tgsi/lowering: improved lowering for XPD
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33
34 #include "tgsi_lowering.h"
35
36 struct tgsi_lowering_context {
37 struct tgsi_transform_context base;
38 const struct tgsi_lowering_config *config;
39 struct tgsi_shader_info *info;
40 unsigned two_side_colors;
41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
43 int face_idx;
44 unsigned numtmp;
45 struct {
46 struct tgsi_full_src_register src;
47 struct tgsi_full_dst_register dst;
48 } tmp[2];
49 #define A 0
50 #define B 1
51 struct tgsi_full_src_register imm;
52 int emitted_decls;
53 unsigned saturate;
54 };
55
56 static inline struct tgsi_lowering_context *
57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_lowering_context *)tctx;
60 }
61
62 /*
63 * Utility helpers:
64 */
65
66 static void
67 reg_dst(struct tgsi_full_dst_register *dst,
68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70 *dst = *orig_dst;
71 dst->Register.WriteMask &= wrmask;
72 assert(dst->Register.WriteMask);
73 }
74
75 static inline void
76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78 swiz[0] = src->SwizzleX;
79 swiz[1] = src->SwizzleY;
80 swiz[2] = src->SwizzleZ;
81 swiz[3] = src->SwizzleW;
82 }
83
84 static void
85 reg_src(struct tgsi_full_src_register *src,
86 const struct tgsi_full_src_register *orig_src,
87 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89 unsigned swiz[4];
90 get_swiz(swiz, &orig_src->Register);
91 *src = *orig_src;
92 src->Register.SwizzleX = swiz[sx];
93 src->Register.SwizzleY = swiz[sy];
94 src->Register.SwizzleZ = swiz[sz];
95 src->Register.SwizzleW = swiz[sw];
96 }
97
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102 /*
103 * if (dst.x aliases src.x) {
104 * MOV tmpA.x, src.x
105 * src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111 static bool
112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115 if ((dst->Register.File == src->Register.File) &&
116 (dst->Register.Index == src->Register.Index)) {
117 unsigned i, actual_mask = 0;
118 unsigned swiz[4];
119 get_swiz(swiz, &src->Register);
120 for (i = 0; i < 4; i++)
121 if (src_mask & (1 << i))
122 actual_mask |= (1 << swiz[i]);
123 if (actual_mask & dst_mask)
124 return true;
125 }
126 return false;
127 }
128
129 static void
130 create_mov(struct tgsi_transform_context *tctx,
131 const struct tgsi_full_dst_register *dst,
132 const struct tgsi_full_src_register *src,
133 unsigned mask, unsigned saturate)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.Saturate = saturate;
140 new_inst.Instruction.NumDstRegs = 1;
141 reg_dst(&new_inst.Dst[0], dst, mask);
142 new_inst.Instruction.NumSrcRegs = 1;
143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144 tctx->emit_instruction(tctx, &new_inst);
145 }
146
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 * 1 : instruction itself
154 * 1 : dst
155 * 1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 * 1 : instruction itself
159 * 2 : dst
160 * 2 * nargs : srcN
161 */
162
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
165
166 /*
167 * Lowering Translators:
168 */
169
170 /* DST - Distance Vector
171 * dst.x = 1.0
172 * dst.y = src0.y \times src1.y
173 * dst.z = src0.z
174 * dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
181 * src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
185 * src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP 2
195 static void
196 transform_dst(struct tgsi_transform_context *tctx,
197 struct tgsi_full_instruction *inst)
198 {
199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200 struct tgsi_full_dst_register *dst = &inst->Dst[0];
201 struct tgsi_full_src_register *src0 = &inst->Src[0];
202 struct tgsi_full_src_register *src1 = &inst->Src[1];
203 struct tgsi_full_instruction new_inst;
204
205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207 src0 = &ctx->tmp[A].src;
208 }
209
210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212 src1 = &ctx->tmp[B].src;
213 }
214
215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst = tgsi_default_full_instruction();
218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219 new_inst.Instruction.NumDstRegs = 1;
220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221 new_inst.Instruction.NumSrcRegs = 2;
222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224 tctx->emit_instruction(tctx, &new_inst);
225 }
226
227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228 /* MOV dst.z, src0.z */
229 new_inst = tgsi_default_full_instruction();
230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231 new_inst.Instruction.NumDstRegs = 1;
232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233 new_inst.Instruction.NumSrcRegs = 1;
234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235 tctx->emit_instruction(tctx, &new_inst);
236 }
237
238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239 /* MOV dst.w, src1.w */
240 new_inst = tgsi_default_full_instruction();
241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242 new_inst.Instruction.NumDstRegs = 1;
243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244 new_inst.Instruction.NumSrcRegs = 1;
245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246 tctx->emit_instruction(tctx, &new_inst);
247 }
248
249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst = tgsi_default_full_instruction();
252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253 new_inst.Instruction.NumDstRegs = 1;
254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255 new_inst.Instruction.NumSrcRegs = 1;
256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257 tctx->emit_instruction(tctx, &new_inst);
258 }
259 }
260
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
265 * dst.w = 1.0
266 *
267 * ; needs: 1 tmp, imm{1.0}
268 * MUL tmpA.xyz, src1.yzx, src0.zxy
269 * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
270 * MOV dst.w, imm{1.0}
271 */
272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
273 #define XPD_TMP 1
274 static void
275 transform_xpd(struct tgsi_transform_context *tctx,
276 struct tgsi_full_instruction *inst)
277 {
278 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
279 struct tgsi_full_dst_register *dst = &inst->Dst[0];
280 struct tgsi_full_src_register *src0 = &inst->Src[0];
281 struct tgsi_full_src_register *src1 = &inst->Src[1];
282 struct tgsi_full_instruction new_inst;
283
284 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
285 /* MUL tmpA.xyz, src1.yzx, src0.zxy */
286 new_inst = tgsi_default_full_instruction();
287 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
288 new_inst.Instruction.NumDstRegs = 1;
289 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
290 new_inst.Instruction.NumSrcRegs = 2;
291 reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
292 reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
293 tctx->emit_instruction(tctx, &new_inst);
294
295 /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
296 new_inst = tgsi_default_full_instruction();
297 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
298 new_inst.Instruction.NumDstRegs = 1;
299 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
300 new_inst.Instruction.NumSrcRegs = 3;
301 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
302 reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
303 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
304 new_inst.Src[2].Register.Negate = true;
305 tctx->emit_instruction(tctx, &new_inst);
306 }
307
308 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
309 /* MOV dst.w, imm{1.0} */
310 new_inst = tgsi_default_full_instruction();
311 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
312 new_inst.Instruction.NumDstRegs = 1;
313 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
314 new_inst.Instruction.NumSrcRegs = 1;
315 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
316 tctx->emit_instruction(tctx, &new_inst);
317 }
318 }
319
320 /* SCS - Sine Cosine
321 * dst.x = \cos{src.x}
322 * dst.y = \sin{src.x}
323 * dst.z = 0.0
324 * dst.w = 1.0
325 *
326 * ; needs: 1 tmp, imm{0.0, 1.0}
327 * if (dst.x aliases src.x) {
328 * MOV tmpA.x, src.x
329 * src = tmpA
330 * }
331 * COS dst.x, src.x
332 * SIN dst.y, src.x
333 * MOV dst.zw, imm{0.0, 1.0}
334 */
335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
336 #define SCS_TMP 1
337 static void
338 transform_scs(struct tgsi_transform_context *tctx,
339 struct tgsi_full_instruction *inst)
340 {
341 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
342 struct tgsi_full_dst_register *dst = &inst->Dst[0];
343 struct tgsi_full_src_register *src = &inst->Src[0];
344 struct tgsi_full_instruction new_inst;
345
346 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
347 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
348 src = &ctx->tmp[A].src;
349 }
350
351 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
352 /* COS dst.x, src.x */
353 new_inst = tgsi_default_full_instruction();
354 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
355 new_inst.Instruction.NumDstRegs = 1;
356 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
357 new_inst.Instruction.NumSrcRegs = 1;
358 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
359 tctx->emit_instruction(tctx, &new_inst);
360 }
361
362 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
363 /* SIN dst.y, src.x */
364 new_inst = tgsi_default_full_instruction();
365 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
366 new_inst.Instruction.NumDstRegs = 1;
367 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
368 new_inst.Instruction.NumSrcRegs = 1;
369 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
370 tctx->emit_instruction(tctx, &new_inst);
371 }
372
373 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
374 /* MOV dst.zw, imm{0.0, 1.0} */
375 new_inst = tgsi_default_full_instruction();
376 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
377 new_inst.Instruction.NumDstRegs = 1;
378 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
379 new_inst.Instruction.NumSrcRegs = 1;
380 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
381 tctx->emit_instruction(tctx, &new_inst);
382 }
383 }
384
385 /* LRP - Linear Interpolate
386 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
387 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
388 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
389 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
390 *
391 * ; needs: 2 tmp, imm{1.0}
392 * MUL tmpA, src0, src1
393 * SUB tmpB, imm{1.0}, src0
394 * MUL tmpB, tmpB, src2
395 * ADD dst, tmpA, tmpB
396 */
397 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
398 #define LRP_TMP 2
399 static void
400 transform_lrp(struct tgsi_transform_context *tctx,
401 struct tgsi_full_instruction *inst)
402 {
403 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
404 struct tgsi_full_dst_register *dst = &inst->Dst[0];
405 struct tgsi_full_src_register *src0 = &inst->Src[0];
406 struct tgsi_full_src_register *src1 = &inst->Src[1];
407 struct tgsi_full_src_register *src2 = &inst->Src[2];
408 struct tgsi_full_instruction new_inst;
409
410 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
411 /* MUL tmpA, src0, src1 */
412 new_inst = tgsi_default_full_instruction();
413 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
414 new_inst.Instruction.NumDstRegs = 1;
415 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
416 new_inst.Instruction.NumSrcRegs = 2;
417 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
418 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
419 tctx->emit_instruction(tctx, &new_inst);
420
421 /* SUB tmpB, imm{1.0}, src0 */
422 new_inst = tgsi_default_full_instruction();
423 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
424 new_inst.Instruction.NumDstRegs = 1;
425 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
426 new_inst.Instruction.NumSrcRegs = 2;
427 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
428 reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
429 tctx->emit_instruction(tctx, &new_inst);
430
431 /* MUL tmpB, tmpB, src2 */
432 new_inst = tgsi_default_full_instruction();
433 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
434 new_inst.Instruction.NumDstRegs = 1;
435 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
436 new_inst.Instruction.NumSrcRegs = 2;
437 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
438 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
439 tctx->emit_instruction(tctx, &new_inst);
440
441 /* ADD dst, tmpA, tmpB */
442 new_inst = tgsi_default_full_instruction();
443 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
444 new_inst.Instruction.NumDstRegs = 1;
445 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
446 new_inst.Instruction.NumSrcRegs = 2;
447 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
448 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
449 tctx->emit_instruction(tctx, &new_inst);
450 }
451 }
452
453 /* FRC - Fraction
454 * dst.x = src.x - \lfloor src.x\rfloor
455 * dst.y = src.y - \lfloor src.y\rfloor
456 * dst.z = src.z - \lfloor src.z\rfloor
457 * dst.w = src.w - \lfloor src.w\rfloor
458 *
459 * ; needs: 1 tmp
460 * FLR tmpA, src
461 * SUB dst, src, tmpA
462 */
463 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
464 #define FRC_TMP 1
465 static void
466 transform_frc(struct tgsi_transform_context *tctx,
467 struct tgsi_full_instruction *inst)
468 {
469 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
470 struct tgsi_full_dst_register *dst = &inst->Dst[0];
471 struct tgsi_full_src_register *src = &inst->Src[0];
472 struct tgsi_full_instruction new_inst;
473
474 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
475 /* FLR tmpA, src */
476 new_inst = tgsi_default_full_instruction();
477 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
478 new_inst.Instruction.NumDstRegs = 1;
479 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
480 new_inst.Instruction.NumSrcRegs = 1;
481 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
482 tctx->emit_instruction(tctx, &new_inst);
483
484 /* SUB dst, src, tmpA */
485 new_inst = tgsi_default_full_instruction();
486 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
487 new_inst.Instruction.NumDstRegs = 1;
488 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
489 new_inst.Instruction.NumSrcRegs = 2;
490 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
491 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
492 tctx->emit_instruction(tctx, &new_inst);
493 }
494 }
495
496 /* POW - Power
497 * dst.x = src0.x^{src1.x}
498 * dst.y = src0.x^{src1.x}
499 * dst.z = src0.x^{src1.x}
500 * dst.w = src0.x^{src1.x}
501 *
502 * ; needs: 1 tmp
503 * LG2 tmpA.x, src0.x
504 * MUL tmpA.x, src1.x, tmpA.x
505 * EX2 dst, tmpA.x
506 */
507 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
508 #define POW_TMP 1
509 static void
510 transform_pow(struct tgsi_transform_context *tctx,
511 struct tgsi_full_instruction *inst)
512 {
513 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
514 struct tgsi_full_dst_register *dst = &inst->Dst[0];
515 struct tgsi_full_src_register *src0 = &inst->Src[0];
516 struct tgsi_full_src_register *src1 = &inst->Src[1];
517 struct tgsi_full_instruction new_inst;
518
519 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
520 /* LG2 tmpA.x, src0.x */
521 new_inst = tgsi_default_full_instruction();
522 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
523 new_inst.Instruction.NumDstRegs = 1;
524 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
525 new_inst.Instruction.NumSrcRegs = 1;
526 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
527 tctx->emit_instruction(tctx, &new_inst);
528
529 /* MUL tmpA.x, src1.x, tmpA.x */
530 new_inst = tgsi_default_full_instruction();
531 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
532 new_inst.Instruction.NumDstRegs = 1;
533 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
534 new_inst.Instruction.NumSrcRegs = 2;
535 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
536 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
537 tctx->emit_instruction(tctx, &new_inst);
538
539 /* EX2 dst, tmpA.x */
540 new_inst = tgsi_default_full_instruction();
541 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
542 new_inst.Instruction.NumDstRegs = 1;
543 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
544 new_inst.Instruction.NumSrcRegs = 1;
545 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
546 tctx->emit_instruction(tctx, &new_inst);
547 }
548 }
549
550 /* LIT - Light Coefficients
551 * dst.x = 1.0
552 * dst.y = max(src.x, 0.0)
553 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
554 * dst.w = 1.0
555 *
556 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
557 * MAX tmpA.xy, src.xy, imm{0.0}
558 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
559 * LG2 tmpA.y, tmpA.y
560 * MUL tmpA.y, tmpA.z, tmpA.y
561 * EX2 tmpA.y, tmpA.y
562 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
563 * MOV dst.yz, tmpA.xy
564 * MOV dst.xw, imm{1.0}
565 */
566 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
567 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
568 #define LIT_TMP 1
569 static void
570 transform_lit(struct tgsi_transform_context *tctx,
571 struct tgsi_full_instruction *inst)
572 {
573 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
574 struct tgsi_full_dst_register *dst = &inst->Dst[0];
575 struct tgsi_full_src_register *src = &inst->Src[0];
576 struct tgsi_full_instruction new_inst;
577
578 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
579 /* MAX tmpA.xy, src.xy, imm{0.0} */
580 new_inst = tgsi_default_full_instruction();
581 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
582 new_inst.Instruction.NumDstRegs = 1;
583 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
584 new_inst.Instruction.NumSrcRegs = 2;
585 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
586 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
587 tctx->emit_instruction(tctx, &new_inst);
588
589 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
590 new_inst = tgsi_default_full_instruction();
591 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
592 new_inst.Instruction.NumDstRegs = 1;
593 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
594 new_inst.Instruction.NumSrcRegs = 3;
595 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
596 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
597 new_inst.Src[1].Register.Negate = true;
598 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
599 tctx->emit_instruction(tctx, &new_inst);
600
601 /* LG2 tmpA.y, tmpA.y */
602 new_inst = tgsi_default_full_instruction();
603 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
604 new_inst.Instruction.NumDstRegs = 1;
605 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
606 new_inst.Instruction.NumSrcRegs = 1;
607 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
608 tctx->emit_instruction(tctx, &new_inst);
609
610 /* MUL tmpA.y, tmpA.z, tmpA.y */
611 new_inst = tgsi_default_full_instruction();
612 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
613 new_inst.Instruction.NumDstRegs = 1;
614 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
615 new_inst.Instruction.NumSrcRegs = 2;
616 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
617 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
618 tctx->emit_instruction(tctx, &new_inst);
619
620 /* EX2 tmpA.y, tmpA.y */
621 new_inst = tgsi_default_full_instruction();
622 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
623 new_inst.Instruction.NumDstRegs = 1;
624 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
625 new_inst.Instruction.NumSrcRegs = 1;
626 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
627 tctx->emit_instruction(tctx, &new_inst);
628
629 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
630 new_inst = tgsi_default_full_instruction();
631 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
632 new_inst.Instruction.NumDstRegs = 1;
633 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
634 new_inst.Instruction.NumSrcRegs = 3;
635 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
636 new_inst.Src[0].Register.Negate = true;
637 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
638 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
639 tctx->emit_instruction(tctx, &new_inst);
640
641 /* MOV dst.yz, tmpA.xy */
642 new_inst = tgsi_default_full_instruction();
643 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
644 new_inst.Instruction.NumDstRegs = 1;
645 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
646 new_inst.Instruction.NumSrcRegs = 1;
647 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
648 tctx->emit_instruction(tctx, &new_inst);
649 }
650
651 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
652 /* MOV dst.xw, imm{1.0} */
653 new_inst = tgsi_default_full_instruction();
654 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
655 new_inst.Instruction.NumDstRegs = 1;
656 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
657 new_inst.Instruction.NumSrcRegs = 1;
658 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
659 tctx->emit_instruction(tctx, &new_inst);
660 }
661 }
662
663 /* EXP - Approximate Exponential Base 2
664 * dst.x = 2^{\lfloor src.x\rfloor}
665 * dst.y = src.x - \lfloor src.x\rfloor
666 * dst.z = 2^{src.x}
667 * dst.w = 1.0
668 *
669 * ; needs: 1 tmp, imm{1.0}
670 * if (lowering FLR) {
671 * FRC tmpA.x, src.x
672 * SUB tmpA.x, src.x, tmpA.x
673 * } else {
674 * FLR tmpA.x, src.x
675 * }
676 * EX2 tmpA.y, src.x
677 * SUB dst.y, src.x, tmpA.x
678 * EX2 dst.x, tmpA.x
679 * MOV dst.z, tmpA.y
680 * MOV dst.w, imm{1.0}
681 */
682 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
683 NINST(1)+ NINST(1) - OINST(1))
684 #define EXP_TMP 1
685 static void
686 transform_exp(struct tgsi_transform_context *tctx,
687 struct tgsi_full_instruction *inst)
688 {
689 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
690 struct tgsi_full_dst_register *dst = &inst->Dst[0];
691 struct tgsi_full_src_register *src = &inst->Src[0];
692 struct tgsi_full_instruction new_inst;
693
694 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
695 if (ctx->config->lower_FLR) {
696 /* FRC tmpA.x, src.x */
697 new_inst = tgsi_default_full_instruction();
698 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
699 new_inst.Instruction.NumDstRegs = 1;
700 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
701 new_inst.Instruction.NumSrcRegs = 1;
702 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
703 tctx->emit_instruction(tctx, &new_inst);
704
705 /* SUB tmpA.x, src.x, tmpA.x */
706 new_inst = tgsi_default_full_instruction();
707 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
708 new_inst.Instruction.NumDstRegs = 1;
709 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
710 new_inst.Instruction.NumSrcRegs = 2;
711 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
712 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
713 tctx->emit_instruction(tctx, &new_inst);
714 } else {
715 /* FLR tmpA.x, src.x */
716 new_inst = tgsi_default_full_instruction();
717 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
718 new_inst.Instruction.NumDstRegs = 1;
719 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
720 new_inst.Instruction.NumSrcRegs = 1;
721 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
722 tctx->emit_instruction(tctx, &new_inst);
723 }
724 }
725
726 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
727 /* EX2 tmpA.y, src.x */
728 new_inst = tgsi_default_full_instruction();
729 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
730 new_inst.Instruction.NumDstRegs = 1;
731 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
732 new_inst.Instruction.NumSrcRegs = 1;
733 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
734 tctx->emit_instruction(tctx, &new_inst);
735 }
736
737 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
738 /* SUB dst.y, src.x, tmpA.x */
739 new_inst = tgsi_default_full_instruction();
740 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
741 new_inst.Instruction.NumDstRegs = 1;
742 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
743 new_inst.Instruction.NumSrcRegs = 2;
744 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
745 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
746 tctx->emit_instruction(tctx, &new_inst);
747 }
748
749 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
750 /* EX2 dst.x, tmpA.x */
751 new_inst = tgsi_default_full_instruction();
752 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
753 new_inst.Instruction.NumDstRegs = 1;
754 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
755 new_inst.Instruction.NumSrcRegs = 1;
756 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
757 tctx->emit_instruction(tctx, &new_inst);
758 }
759
760 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
761 /* MOV dst.z, tmpA.y */
762 new_inst = tgsi_default_full_instruction();
763 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
764 new_inst.Instruction.NumDstRegs = 1;
765 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
766 new_inst.Instruction.NumSrcRegs = 1;
767 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
768 tctx->emit_instruction(tctx, &new_inst);
769 }
770
771 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
772 /* MOV dst.w, imm{1.0} */
773 new_inst = tgsi_default_full_instruction();
774 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
775 new_inst.Instruction.NumDstRegs = 1;
776 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
777 new_inst.Instruction.NumSrcRegs = 1;
778 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
779 tctx->emit_instruction(tctx, &new_inst);
780 }
781 }
782
783 /* LOG - Approximate Logarithm Base 2
784 * dst.x = \lfloor\log_2{|src.x|}\rfloor
785 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
786 * dst.z = \log_2{|src.x|}
787 * dst.w = 1.0
788 *
789 * ; needs: 1 tmp, imm{1.0}
790 * LG2 tmpA.x, |src.x|
791 * if (lowering FLR) {
792 * FRC tmpA.y, tmpA.x
793 * SUB tmpA.y, tmpA.x, tmpA.y
794 * } else {
795 * FLR tmpA.y, tmpA.x
796 * }
797 * EX2 tmpA.z, tmpA.y
798 * RCP tmpA.z, tmpA.z
799 * MUL dst.y, |src.x|, tmpA.z
800 * MOV dst.xz, tmpA.yx
801 * MOV dst.w, imm{1.0}
802 */
803 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
804 NINST(2) + NINST(1) + NINST(1) - OINST(1))
805 #define LOG_TMP 1
806 static void
807 transform_log(struct tgsi_transform_context *tctx,
808 struct tgsi_full_instruction *inst)
809 {
810 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
811 struct tgsi_full_dst_register *dst = &inst->Dst[0];
812 struct tgsi_full_src_register *src = &inst->Src[0];
813 struct tgsi_full_instruction new_inst;
814
815 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
816 /* LG2 tmpA.x, |src.x| */
817 new_inst = tgsi_default_full_instruction();
818 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
819 new_inst.Instruction.NumDstRegs = 1;
820 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
821 new_inst.Instruction.NumSrcRegs = 1;
822 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
823 new_inst.Src[0].Register.Absolute = true;
824 tctx->emit_instruction(tctx, &new_inst);
825 }
826
827 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
828 if (ctx->config->lower_FLR) {
829 /* FRC tmpA.y, tmpA.x */
830 new_inst = tgsi_default_full_instruction();
831 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
832 new_inst.Instruction.NumDstRegs = 1;
833 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
834 new_inst.Instruction.NumSrcRegs = 1;
835 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
836 tctx->emit_instruction(tctx, &new_inst);
837
838 /* SUB tmpA.y, tmpA.x, tmpA.y */
839 new_inst = tgsi_default_full_instruction();
840 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
841 new_inst.Instruction.NumDstRegs = 1;
842 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
843 new_inst.Instruction.NumSrcRegs = 2;
844 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
845 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
846 tctx->emit_instruction(tctx, &new_inst);
847 } else {
848 /* FLR tmpA.y, tmpA.x */
849 new_inst = tgsi_default_full_instruction();
850 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
851 new_inst.Instruction.NumDstRegs = 1;
852 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
853 new_inst.Instruction.NumSrcRegs = 1;
854 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
855 tctx->emit_instruction(tctx, &new_inst);
856 }
857 }
858
859 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
860 /* EX2 tmpA.z, tmpA.y */
861 new_inst = tgsi_default_full_instruction();
862 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
863 new_inst.Instruction.NumDstRegs = 1;
864 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
865 new_inst.Instruction.NumSrcRegs = 1;
866 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
867 tctx->emit_instruction(tctx, &new_inst);
868
869 /* RCP tmpA.z, tmpA.z */
870 new_inst = tgsi_default_full_instruction();
871 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
872 new_inst.Instruction.NumDstRegs = 1;
873 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
874 new_inst.Instruction.NumSrcRegs = 1;
875 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
876 tctx->emit_instruction(tctx, &new_inst);
877
878 /* MUL dst.y, |src.x|, tmpA.z */
879 new_inst = tgsi_default_full_instruction();
880 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
881 new_inst.Instruction.NumDstRegs = 1;
882 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
883 new_inst.Instruction.NumSrcRegs = 2;
884 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
885 new_inst.Src[0].Register.Absolute = true;
886 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
887 tctx->emit_instruction(tctx, &new_inst);
888 }
889
890 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
891 /* MOV dst.xz, tmpA.yx */
892 new_inst = tgsi_default_full_instruction();
893 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
894 new_inst.Instruction.NumDstRegs = 1;
895 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
896 new_inst.Instruction.NumSrcRegs = 1;
897 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
898 tctx->emit_instruction(tctx, &new_inst);
899 }
900
901 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
902 /* MOV dst.w, imm{1.0} */
903 new_inst = tgsi_default_full_instruction();
904 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
905 new_inst.Instruction.NumDstRegs = 1;
906 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
907 new_inst.Instruction.NumSrcRegs = 1;
908 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
909 tctx->emit_instruction(tctx, &new_inst);
910 }
911 }
912
913 /* DP4 - 4-component Dot Product
914 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
915 *
916 * DP3 - 3-component Dot Product
917 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
918 *
919 * DPH - Homogeneous Dot Product
920 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
921 *
922 * DP2 - 2-component Dot Product
923 * dst = src0.x \times src1.x + src0.y \times src1.y
924 *
925 * DP2A - 2-component Dot Product And Add
926 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
927 *
928 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
929 * operations, which is what you'd prefer for a ISA that is natively
930 * scalar. Probably a native vector ISA would at least already have
931 * DP4/DP3 instructions, but perhaps there is room for an alternative
932 * translation for DPH/DP2/DP2A using vector instructions.
933 *
934 * ; needs: 1 tmp
935 * MUL tmpA.x, src0.x, src1.x
936 * MAD tmpA.x, src0.y, src1.y, tmpA.x
937 * if (DPH || DP3 || DP4) {
938 * MAD tmpA.x, src0.z, src1.z, tmpA.x
939 * if (DPH) {
940 * ADD tmpA.x, src1.w, tmpA.x
941 * } else if (DP4) {
942 * MAD tmpA.x, src0.w, src1.w, tmpA.x
943 * }
944 * } else if (DP2A) {
945 * ADD tmpA.x, src2.x, tmpA.x
946 * }
947 * ; fixup last instruction to replicate into dst
948 */
949 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
950 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
951 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
952 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
953 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
954 #define DOTP_TMP 1
955 static void
956 transform_dotp(struct tgsi_transform_context *tctx,
957 struct tgsi_full_instruction *inst)
958 {
959 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
960 struct tgsi_full_dst_register *dst = &inst->Dst[0];
961 struct tgsi_full_src_register *src0 = &inst->Src[0];
962 struct tgsi_full_src_register *src1 = &inst->Src[1];
963 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
964 struct tgsi_full_instruction new_inst;
965 unsigned opcode = inst->Instruction.Opcode;
966
967 /* NOTE: any potential last instruction must replicate src on all
968 * components (since it could be re-written to write to final dst)
969 */
970
971 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
972 /* MUL tmpA.x, src0.x, src1.x */
973 new_inst = tgsi_default_full_instruction();
974 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
975 new_inst.Instruction.NumDstRegs = 1;
976 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
977 new_inst.Instruction.NumSrcRegs = 2;
978 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
979 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
980 tctx->emit_instruction(tctx, &new_inst);
981
982 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
983 new_inst = tgsi_default_full_instruction();
984 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
985 new_inst.Instruction.NumDstRegs = 1;
986 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
987 new_inst.Instruction.NumSrcRegs = 3;
988 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
989 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
990 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
991
992 if ((opcode == TGSI_OPCODE_DPH) ||
993 (opcode == TGSI_OPCODE_DP3) ||
994 (opcode == TGSI_OPCODE_DP4)) {
995 tctx->emit_instruction(tctx, &new_inst);
996
997 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
998 new_inst = tgsi_default_full_instruction();
999 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1000 new_inst.Instruction.NumDstRegs = 1;
1001 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1002 new_inst.Instruction.NumSrcRegs = 3;
1003 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
1004 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
1005 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1006
1007 if (opcode == TGSI_OPCODE_DPH) {
1008 tctx->emit_instruction(tctx, &new_inst);
1009
1010 /* ADD tmpA.x, src1.w, tmpA.x */
1011 new_inst = tgsi_default_full_instruction();
1012 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1013 new_inst.Instruction.NumDstRegs = 1;
1014 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1015 new_inst.Instruction.NumSrcRegs = 2;
1016 reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
1017 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1018 } else if (opcode == TGSI_OPCODE_DP4) {
1019 tctx->emit_instruction(tctx, &new_inst);
1020
1021 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
1022 new_inst = tgsi_default_full_instruction();
1023 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1024 new_inst.Instruction.NumDstRegs = 1;
1025 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1026 new_inst.Instruction.NumSrcRegs = 3;
1027 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
1028 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
1029 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1030 }
1031 } else if (opcode == TGSI_OPCODE_DP2A) {
1032 tctx->emit_instruction(tctx, &new_inst);
1033
1034 /* ADD tmpA.x, src2.x, tmpA.x */
1035 new_inst = tgsi_default_full_instruction();
1036 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1037 new_inst.Instruction.NumDstRegs = 1;
1038 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1039 new_inst.Instruction.NumSrcRegs = 2;
1040 reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
1041 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1042 }
1043
1044 /* fixup last instruction to write to dst: */
1045 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1046
1047 tctx->emit_instruction(tctx, &new_inst);
1048 }
1049 }
1050
1051 /* FLR - floor, CEIL - ceil
1052 * ; needs: 1 tmp
1053 * if (CEIL) {
1054 * FRC tmpA, -src
1055 * ADD dst, src, tmpA
1056 * } else {
1057 * FRC tmpA, src
1058 * SUB dst, src, tmpA
1059 * }
1060 */
1061 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
1062 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
1063 #define FLR_TMP 1
1064 #define CEIL_TMP 1
1065 static void
1066 transform_flr_ceil(struct tgsi_transform_context *tctx,
1067 struct tgsi_full_instruction *inst)
1068 {
1069 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1070 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1071 struct tgsi_full_src_register *src0 = &inst->Src[0];
1072 struct tgsi_full_instruction new_inst;
1073 unsigned opcode = inst->Instruction.Opcode;
1074
1075 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1076 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
1077 new_inst = tgsi_default_full_instruction();
1078 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1079 new_inst.Instruction.NumDstRegs = 1;
1080 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1081 new_inst.Instruction.NumSrcRegs = 1;
1082 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1083
1084 if (opcode == TGSI_OPCODE_CEIL)
1085 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
1086 tctx->emit_instruction(tctx, &new_inst);
1087
1088 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
1089 new_inst = tgsi_default_full_instruction();
1090 if (opcode == TGSI_OPCODE_CEIL)
1091 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1092 else
1093 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
1094 new_inst.Instruction.NumDstRegs = 1;
1095 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1096 new_inst.Instruction.NumSrcRegs = 2;
1097 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1098 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1099 tctx->emit_instruction(tctx, &new_inst);
1100 }
1101 }
1102
1103 /* TRUNC - truncate off fractional part
1104 * dst.x = trunc(src.x)
1105 * dst.y = trunc(src.y)
1106 * dst.z = trunc(src.z)
1107 * dst.w = trunc(src.w)
1108 *
1109 * ; needs: 1 tmp
1110 * if (lower FLR) {
1111 * FRC tmpA, |src|
1112 * SUB tmpA, |src|, tmpA
1113 * } else {
1114 * FLR tmpA, |src|
1115 * }
1116 * CMP dst, src, -tmpA, tmpA
1117 */
1118 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
1119 #define TRUNC_TMP 1
1120 static void
1121 transform_trunc(struct tgsi_transform_context *tctx,
1122 struct tgsi_full_instruction *inst)
1123 {
1124 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1125 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1126 struct tgsi_full_src_register *src0 = &inst->Src[0];
1127 struct tgsi_full_instruction new_inst;
1128
1129 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1130 if (ctx->config->lower_FLR) {
1131 new_inst = tgsi_default_full_instruction();
1132 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1133 new_inst.Instruction.NumDstRegs = 1;
1134 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1135 new_inst.Instruction.NumSrcRegs = 1;
1136 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1137 new_inst.Src[0].Register.Absolute = true;
1138 new_inst.Src[0].Register.Negate = false;
1139 tctx->emit_instruction(tctx, &new_inst);
1140
1141 new_inst = tgsi_default_full_instruction();
1142 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
1143 new_inst.Instruction.NumDstRegs = 1;
1144 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1145 new_inst.Instruction.NumSrcRegs = 2;
1146 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1147 new_inst.Src[0].Register.Absolute = true;
1148 new_inst.Src[0].Register.Negate = false;
1149 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1150 tctx->emit_instruction(tctx, &new_inst);
1151 } else {
1152 new_inst = tgsi_default_full_instruction();
1153 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
1154 new_inst.Instruction.NumDstRegs = 1;
1155 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1156 new_inst.Instruction.NumSrcRegs = 1;
1157 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1158 new_inst.Src[0].Register.Absolute = true;
1159 new_inst.Src[0].Register.Negate = false;
1160 tctx->emit_instruction(tctx, &new_inst);
1161 }
1162
1163 new_inst = tgsi_default_full_instruction();
1164 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1165 new_inst.Instruction.NumDstRegs = 1;
1166 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1167 new_inst.Instruction.NumSrcRegs = 3;
1168 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1169 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1170 new_inst.Src[1].Register.Negate = true;
1171 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1172 tctx->emit_instruction(tctx, &new_inst);
1173 }
1174 }
1175
1176 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1177 * in the case of TXP, the clamping must happen *after* projection, so
1178 * we need to lower TXP to TEX.
1179 *
1180 * MOV tmpA, src0
1181 * if (opc == TXP) {
1182 * ; do perspective division manually before clamping:
1183 * RCP tmpB, tmpA.w
1184 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1185 * opc = TEX;
1186 * }
1187 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1188 * <opc> dst, tmpA, ...
1189 */
1190 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1191 #define SAMP_TMP 2
1192 static int
1193 transform_samp(struct tgsi_transform_context *tctx,
1194 struct tgsi_full_instruction *inst)
1195 {
1196 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1197 struct tgsi_full_src_register *coord = &inst->Src[0];
1198 struct tgsi_full_src_register *samp;
1199 struct tgsi_full_instruction new_inst;
1200 /* mask is clamped coords, pmask is all coords (for projection): */
1201 unsigned mask = 0, pmask = 0, smask;
1202 unsigned tex = inst->Texture.Texture;
1203 unsigned opcode = inst->Instruction.Opcode;
1204 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1205 (ctx->config->lower_TXP & (1 << tex));
1206
1207 if (opcode == TGSI_OPCODE_TXB2) {
1208 samp = &inst->Src[2];
1209 } else {
1210 samp = &inst->Src[1];
1211 }
1212
1213 /* convert sampler # to bitmask to test: */
1214 smask = 1 << samp->Register.Index;
1215
1216 /* check if we actually need to lower this one: */
1217 if (!(ctx->saturate & smask) && !lower_txp)
1218 return -1;
1219
1220 /* figure out which coordinates need saturating:
1221 * - RECT textures should not get saturated
1222 * - array index coords should not get saturated
1223 */
1224 switch (tex) {
1225 case TGSI_TEXTURE_3D:
1226 case TGSI_TEXTURE_CUBE:
1227 case TGSI_TEXTURE_CUBE_ARRAY:
1228 case TGSI_TEXTURE_SHADOWCUBE:
1229 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1230 if (ctx->config->saturate_r & smask)
1231 mask |= TGSI_WRITEMASK_Z;
1232 pmask |= TGSI_WRITEMASK_Z;
1233 /* fallthrough */
1234
1235 case TGSI_TEXTURE_2D:
1236 case TGSI_TEXTURE_2D_ARRAY:
1237 case TGSI_TEXTURE_SHADOW2D:
1238 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1239 case TGSI_TEXTURE_2D_MSAA:
1240 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1241 if (ctx->config->saturate_t & smask)
1242 mask |= TGSI_WRITEMASK_Y;
1243 pmask |= TGSI_WRITEMASK_Y;
1244 /* fallthrough */
1245
1246 case TGSI_TEXTURE_1D:
1247 case TGSI_TEXTURE_1D_ARRAY:
1248 case TGSI_TEXTURE_SHADOW1D:
1249 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1250 if (ctx->config->saturate_s & smask)
1251 mask |= TGSI_WRITEMASK_X;
1252 pmask |= TGSI_WRITEMASK_X;
1253 break;
1254
1255 case TGSI_TEXTURE_RECT:
1256 case TGSI_TEXTURE_SHADOWRECT:
1257 /* we don't saturate, but in case of lower_txp we
1258 * still need to do the perspective divide:
1259 */
1260 pmask = TGSI_WRITEMASK_XY;
1261 break;
1262 }
1263
1264 /* sanity check.. driver could be asking to saturate a non-
1265 * existent coordinate component:
1266 */
1267 if (!mask && !lower_txp)
1268 return -1;
1269
1270 /* MOV tmpA, src0 */
1271 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1272
1273 /* This is a bit sad.. we need to clamp *after* the coords
1274 * are projected, which means lowering TXP to TEX and doing
1275 * the projection ourself. But since I haven't figured out
1276 * how to make the lowering code deliver an electric shock
1277 * to anyone using GL_CLAMP, we must do this instead:
1278 */
1279 if (opcode == TGSI_OPCODE_TXP) {
1280 /* RCP tmpB.x tmpA.w */
1281 new_inst = tgsi_default_full_instruction();
1282 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1283 new_inst.Instruction.NumDstRegs = 1;
1284 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1285 new_inst.Instruction.NumSrcRegs = 1;
1286 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1287 tctx->emit_instruction(tctx, &new_inst);
1288
1289 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1290 new_inst = tgsi_default_full_instruction();
1291 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1292 new_inst.Instruction.NumDstRegs = 1;
1293 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1294 new_inst.Instruction.NumSrcRegs = 2;
1295 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1296 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1297 tctx->emit_instruction(tctx, &new_inst);
1298
1299 opcode = TGSI_OPCODE_TEX;
1300 }
1301
1302 /* MOV_SAT tmpA.<mask>, tmpA */
1303 if (mask) {
1304 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1305 }
1306
1307 /* modify the texture samp instruction to take fixed up coord: */
1308 new_inst = *inst;
1309 new_inst.Instruction.Opcode = opcode;
1310 new_inst.Src[0] = ctx->tmp[A].src;
1311 tctx->emit_instruction(tctx, &new_inst);
1312
1313 return 0;
1314 }
1315
1316 /* Two-sided color emulation:
1317 * For each COLOR input, create a corresponding BCOLOR input, plus
1318 * CMP instruction to select front or back color based on FACE
1319 */
1320 #define TWOSIDE_GROW(n) ( \
1321 2 + /* FACE */ \
1322 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1323 ((n) * 1) + /* TEMP[] */ \
1324 ((n) * NINST(3)) /* CMP instr */ \
1325 )
1326
1327 static void
1328 emit_twoside(struct tgsi_transform_context *tctx)
1329 {
1330 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1331 struct tgsi_shader_info *info = ctx->info;
1332 struct tgsi_full_declaration decl;
1333 struct tgsi_full_instruction new_inst;
1334 unsigned inbase, tmpbase;
1335 int i;
1336
1337 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1338 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1339
1340 /* additional inputs for BCOLOR's */
1341 for (i = 0; i < ctx->two_side_colors; i++) {
1342 unsigned in_idx = ctx->two_side_idx[i];
1343 decl = tgsi_default_full_declaration();
1344 decl.Declaration.File = TGSI_FILE_INPUT;
1345 decl.Declaration.Semantic = true;
1346 decl.Range.First = decl.Range.Last = inbase + i;
1347 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1348 decl.Semantic.Index = info->input_semantic_index[in_idx];
1349 decl.Declaration.Interpolate = true;
1350 decl.Interp.Interpolate = info->input_interpolate[in_idx];
1351 decl.Interp.Location = info->input_interpolate_loc[in_idx];
1352 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1353 tctx->emit_declaration(tctx, &decl);
1354 }
1355
1356 /* additional input for FACE */
1357 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1358 decl = tgsi_default_full_declaration();
1359 decl.Declaration.File = TGSI_FILE_INPUT;
1360 decl.Declaration.Semantic = true;
1361 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1362 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1363 decl.Semantic.Index = 0;
1364 tctx->emit_declaration(tctx, &decl);
1365
1366 ctx->face_idx = decl.Range.First;
1367 }
1368
1369 /* additional temps for COLOR/BCOLOR selection: */
1370 for (i = 0; i < ctx->two_side_colors; i++) {
1371 decl = tgsi_default_full_declaration();
1372 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1373 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1374 tctx->emit_declaration(tctx, &decl);
1375 }
1376
1377 /* and finally additional instructions to select COLOR/BCOLOR: */
1378 for (i = 0; i < ctx->two_side_colors; i++) {
1379 new_inst = tgsi_default_full_instruction();
1380 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1381
1382 new_inst.Instruction.NumDstRegs = 1;
1383 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1384 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1385 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1386
1387 new_inst.Instruction.NumSrcRegs = 3;
1388 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1389 new_inst.Src[0].Register.Index = ctx->face_idx;
1390 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1391 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1392 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1393 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1394 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1395 new_inst.Src[1].Register.Index = inbase + i;
1396 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1397 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1398 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1399 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1400 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1401 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1402 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1403 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1404 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1405 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1406
1407 tctx->emit_instruction(tctx, &new_inst);
1408 }
1409 }
1410
1411 static void
1412 emit_decls(struct tgsi_transform_context *tctx)
1413 {
1414 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1415 struct tgsi_shader_info *info = ctx->info;
1416 struct tgsi_full_declaration decl;
1417 struct tgsi_full_immediate immed;
1418 unsigned tmpbase;
1419 int i;
1420
1421 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1422
1423 ctx->color_base = tmpbase + ctx->numtmp;
1424
1425 /* declare immediate: */
1426 immed = tgsi_default_full_immediate();
1427 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1428 immed.u[0].Float = 0.0;
1429 immed.u[1].Float = 1.0;
1430 immed.u[2].Float = 128.0;
1431 immed.u[3].Float = 0.0;
1432 tctx->emit_immediate(tctx, &immed);
1433
1434 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1435 ctx->imm.Register.Index = info->immediate_count;
1436 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1437 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1438 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1439 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1440
1441 /* declare temp regs: */
1442 for (i = 0; i < ctx->numtmp; i++) {
1443 decl = tgsi_default_full_declaration();
1444 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1445 decl.Range.First = decl.Range.Last = tmpbase + i;
1446 tctx->emit_declaration(tctx, &decl);
1447
1448 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1449 ctx->tmp[i].src.Register.Index = tmpbase + i;
1450 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1451 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1452 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1453 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1454
1455 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1456 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1457 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1458 }
1459
1460 if (ctx->two_side_colors)
1461 emit_twoside(tctx);
1462 }
1463
1464 static void
1465 rename_color_inputs(struct tgsi_lowering_context *ctx,
1466 struct tgsi_full_instruction *inst)
1467 {
1468 unsigned i, j;
1469 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1470 struct tgsi_src_register *src = &inst->Src[i].Register;
1471 if (src->File == TGSI_FILE_INPUT) {
1472 for (j = 0; j < ctx->two_side_colors; j++) {
1473 if (src->Index == ctx->two_side_idx[j]) {
1474 src->File = TGSI_FILE_TEMPORARY;
1475 src->Index = ctx->color_base + j;
1476 break;
1477 }
1478 }
1479 }
1480 }
1481
1482 }
1483
1484 static void
1485 transform_instr(struct tgsi_transform_context *tctx,
1486 struct tgsi_full_instruction *inst)
1487 {
1488 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1489
1490 if (!ctx->emitted_decls) {
1491 emit_decls(tctx);
1492 ctx->emitted_decls = 1;
1493 }
1494
1495 /* if emulating two-sided-color, we need to re-write some
1496 * src registers:
1497 */
1498 if (ctx->two_side_colors)
1499 rename_color_inputs(ctx, inst);
1500
1501 switch (inst->Instruction.Opcode) {
1502 case TGSI_OPCODE_DST:
1503 if (!ctx->config->lower_DST)
1504 goto skip;
1505 transform_dst(tctx, inst);
1506 break;
1507 case TGSI_OPCODE_XPD:
1508 if (!ctx->config->lower_XPD)
1509 goto skip;
1510 transform_xpd(tctx, inst);
1511 break;
1512 case TGSI_OPCODE_SCS:
1513 if (!ctx->config->lower_SCS)
1514 goto skip;
1515 transform_scs(tctx, inst);
1516 break;
1517 case TGSI_OPCODE_LRP:
1518 if (!ctx->config->lower_LRP)
1519 goto skip;
1520 transform_lrp(tctx, inst);
1521 break;
1522 case TGSI_OPCODE_FRC:
1523 if (!ctx->config->lower_FRC)
1524 goto skip;
1525 transform_frc(tctx, inst);
1526 break;
1527 case TGSI_OPCODE_POW:
1528 if (!ctx->config->lower_POW)
1529 goto skip;
1530 transform_pow(tctx, inst);
1531 break;
1532 case TGSI_OPCODE_LIT:
1533 if (!ctx->config->lower_LIT)
1534 goto skip;
1535 transform_lit(tctx, inst);
1536 break;
1537 case TGSI_OPCODE_EXP:
1538 if (!ctx->config->lower_EXP)
1539 goto skip;
1540 transform_exp(tctx, inst);
1541 break;
1542 case TGSI_OPCODE_LOG:
1543 if (!ctx->config->lower_LOG)
1544 goto skip;
1545 transform_log(tctx, inst);
1546 break;
1547 case TGSI_OPCODE_DP4:
1548 if (!ctx->config->lower_DP4)
1549 goto skip;
1550 transform_dotp(tctx, inst);
1551 break;
1552 case TGSI_OPCODE_DP3:
1553 if (!ctx->config->lower_DP3)
1554 goto skip;
1555 transform_dotp(tctx, inst);
1556 break;
1557 case TGSI_OPCODE_DPH:
1558 if (!ctx->config->lower_DPH)
1559 goto skip;
1560 transform_dotp(tctx, inst);
1561 break;
1562 case TGSI_OPCODE_DP2:
1563 if (!ctx->config->lower_DP2)
1564 goto skip;
1565 transform_dotp(tctx, inst);
1566 break;
1567 case TGSI_OPCODE_DP2A:
1568 if (!ctx->config->lower_DP2A)
1569 goto skip;
1570 transform_dotp(tctx, inst);
1571 break;
1572 case TGSI_OPCODE_FLR:
1573 if (!ctx->config->lower_FLR)
1574 goto skip;
1575 transform_flr_ceil(tctx, inst);
1576 break;
1577 case TGSI_OPCODE_CEIL:
1578 if (!ctx->config->lower_CEIL)
1579 goto skip;
1580 transform_flr_ceil(tctx, inst);
1581 break;
1582 case TGSI_OPCODE_TRUNC:
1583 if (!ctx->config->lower_TRUNC)
1584 goto skip;
1585 transform_trunc(tctx, inst);
1586 break;
1587 case TGSI_OPCODE_TEX:
1588 case TGSI_OPCODE_TXP:
1589 case TGSI_OPCODE_TXB:
1590 case TGSI_OPCODE_TXB2:
1591 case TGSI_OPCODE_TXL:
1592 if (transform_samp(tctx, inst))
1593 goto skip;
1594 break;
1595 default:
1596 skip:
1597 tctx->emit_instruction(tctx, inst);
1598 break;
1599 }
1600 }
1601
1602 /* returns NULL if no lowering required, else returns the new
1603 * tokens (which caller is required to free()). In either case
1604 * returns the current info.
1605 */
1606 const struct tgsi_token *
1607 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1608 const struct tgsi_token *tokens,
1609 struct tgsi_shader_info *info)
1610 {
1611 struct tgsi_lowering_context ctx;
1612 struct tgsi_token *newtoks;
1613 int newlen, numtmp;
1614
1615 /* sanity check in case limit is ever increased: */
1616 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1617
1618 /* sanity check the lowering */
1619 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1620 assert(!(config->lower_FRC && config->lower_TRUNC));
1621
1622 memset(&ctx, 0, sizeof(ctx));
1623 ctx.base.transform_instruction = transform_instr;
1624 ctx.info = info;
1625 ctx.config = config;
1626
1627 tgsi_scan_shader(tokens, info);
1628
1629 /* if we are adding fragment shader support to emulate two-sided
1630 * color, then figure out the number of additional inputs we need
1631 * to create for BCOLOR's..
1632 */
1633 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1634 config->color_two_side) {
1635 int i;
1636 ctx.face_idx = -1;
1637 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1638 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1639 ctx.two_side_idx[ctx.two_side_colors++] = i;
1640 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1641 ctx.face_idx = i;
1642 }
1643 }
1644
1645 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1646
1647 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1648 /* if there are no instructions to lower, then we are done: */
1649 if (!(OPCS(DST) ||
1650 OPCS(XPD) ||
1651 OPCS(SCS) ||
1652 OPCS(LRP) ||
1653 OPCS(FRC) ||
1654 OPCS(POW) ||
1655 OPCS(LIT) ||
1656 OPCS(EXP) ||
1657 OPCS(LOG) ||
1658 OPCS(DP4) ||
1659 OPCS(DP3) ||
1660 OPCS(DPH) ||
1661 OPCS(DP2) ||
1662 OPCS(DP2A) ||
1663 OPCS(FLR) ||
1664 OPCS(CEIL) ||
1665 OPCS(TRUNC) ||
1666 OPCS(TXP) ||
1667 ctx.two_side_colors ||
1668 ctx.saturate))
1669 return NULL;
1670
1671 #if 0 /* debug */
1672 _debug_printf("BEFORE:");
1673 tgsi_dump(tokens, 0);
1674 #endif
1675
1676 numtmp = 0;
1677 newlen = tgsi_num_tokens(tokens);
1678 if (OPCS(DST)) {
1679 newlen += DST_GROW * OPCS(DST);
1680 numtmp = MAX2(numtmp, DST_TMP);
1681 }
1682 if (OPCS(XPD)) {
1683 newlen += XPD_GROW * OPCS(XPD);
1684 numtmp = MAX2(numtmp, XPD_TMP);
1685 }
1686 if (OPCS(SCS)) {
1687 newlen += SCS_GROW * OPCS(SCS);
1688 numtmp = MAX2(numtmp, SCS_TMP);
1689 }
1690 if (OPCS(LRP)) {
1691 newlen += LRP_GROW * OPCS(LRP);
1692 numtmp = MAX2(numtmp, LRP_TMP);
1693 }
1694 if (OPCS(FRC)) {
1695 newlen += FRC_GROW * OPCS(FRC);
1696 numtmp = MAX2(numtmp, FRC_TMP);
1697 }
1698 if (OPCS(POW)) {
1699 newlen += POW_GROW * OPCS(POW);
1700 numtmp = MAX2(numtmp, POW_TMP);
1701 }
1702 if (OPCS(LIT)) {
1703 newlen += LIT_GROW * OPCS(LIT);
1704 numtmp = MAX2(numtmp, LIT_TMP);
1705 }
1706 if (OPCS(EXP)) {
1707 newlen += EXP_GROW * OPCS(EXP);
1708 numtmp = MAX2(numtmp, EXP_TMP);
1709 }
1710 if (OPCS(LOG)) {
1711 newlen += LOG_GROW * OPCS(LOG);
1712 numtmp = MAX2(numtmp, LOG_TMP);
1713 }
1714 if (OPCS(DP4)) {
1715 newlen += DP4_GROW * OPCS(DP4);
1716 numtmp = MAX2(numtmp, DOTP_TMP);
1717 }
1718 if (OPCS(DP3)) {
1719 newlen += DP3_GROW * OPCS(DP3);
1720 numtmp = MAX2(numtmp, DOTP_TMP);
1721 }
1722 if (OPCS(DPH)) {
1723 newlen += DPH_GROW * OPCS(DPH);
1724 numtmp = MAX2(numtmp, DOTP_TMP);
1725 }
1726 if (OPCS(DP2)) {
1727 newlen += DP2_GROW * OPCS(DP2);
1728 numtmp = MAX2(numtmp, DOTP_TMP);
1729 }
1730 if (OPCS(DP2A)) {
1731 newlen += DP2A_GROW * OPCS(DP2A);
1732 numtmp = MAX2(numtmp, DOTP_TMP);
1733 }
1734 if (OPCS(FLR)) {
1735 newlen += FLR_GROW * OPCS(FLR);
1736 numtmp = MAX2(numtmp, FLR_TMP);
1737 }
1738 if (OPCS(CEIL)) {
1739 newlen += CEIL_GROW * OPCS(CEIL);
1740 numtmp = MAX2(numtmp, CEIL_TMP);
1741 }
1742 if (OPCS(TRUNC)) {
1743 newlen += TRUNC_GROW * OPCS(TRUNC);
1744 numtmp = MAX2(numtmp, TRUNC_TMP);
1745 }
1746 if (ctx.saturate || config->lower_TXP) {
1747 int n = 0;
1748
1749 if (ctx.saturate) {
1750 n = info->opcode_count[TGSI_OPCODE_TEX] +
1751 info->opcode_count[TGSI_OPCODE_TXP] +
1752 info->opcode_count[TGSI_OPCODE_TXB] +
1753 info->opcode_count[TGSI_OPCODE_TXB2] +
1754 info->opcode_count[TGSI_OPCODE_TXL];
1755 } else if (config->lower_TXP) {
1756 n = info->opcode_count[TGSI_OPCODE_TXP];
1757 }
1758
1759 newlen += SAMP_GROW * n;
1760 numtmp = MAX2(numtmp, SAMP_TMP);
1761 }
1762
1763 /* specifically don't include two_side_colors temps in the count: */
1764 ctx.numtmp = numtmp;
1765
1766 if (ctx.two_side_colors) {
1767 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1768 /* note: we permanently consume temp regs, re-writing references
1769 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1770 * instruction that selects which varying to use):
1771 */
1772 numtmp += ctx.two_side_colors;
1773 }
1774
1775 newlen += 2 * numtmp;
1776 newlen += 5; /* immediate */
1777
1778 newtoks = tgsi_alloc_tokens(newlen);
1779 if (!newtoks)
1780 return NULL;
1781
1782 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1783
1784 tgsi_scan_shader(newtoks, info);
1785
1786 #if 0 /* debug */
1787 _debug_printf("AFTER:");
1788 tgsi_dump(newtoks, 0);
1789 #endif
1790
1791 return newtoks;
1792 }