gallium: add TGSI_PROPERTY_MUL_ZERO_WINS
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33
34 #include "tgsi_lowering.h"
35
36 struct tgsi_lowering_context {
37 struct tgsi_transform_context base;
38 const struct tgsi_lowering_config *config;
39 struct tgsi_shader_info *info;
40 unsigned two_side_colors;
41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
43 int face_idx;
44 unsigned numtmp;
45 struct {
46 struct tgsi_full_src_register src;
47 struct tgsi_full_dst_register dst;
48 } tmp[2];
49 #define A 0
50 #define B 1
51 struct tgsi_full_src_register imm;
52 int emitted_decls;
53 unsigned saturate;
54 };
55
56 static inline struct tgsi_lowering_context *
57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_lowering_context *)tctx;
60 }
61
62 /*
63 * Utility helpers:
64 */
65
66 static void
67 reg_dst(struct tgsi_full_dst_register *dst,
68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70 *dst = *orig_dst;
71 dst->Register.WriteMask &= wrmask;
72 assert(dst->Register.WriteMask);
73 }
74
75 static inline void
76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78 swiz[0] = src->SwizzleX;
79 swiz[1] = src->SwizzleY;
80 swiz[2] = src->SwizzleZ;
81 swiz[3] = src->SwizzleW;
82 }
83
84 static void
85 reg_src(struct tgsi_full_src_register *src,
86 const struct tgsi_full_src_register *orig_src,
87 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89 unsigned swiz[4];
90 get_swiz(swiz, &orig_src->Register);
91 *src = *orig_src;
92 src->Register.SwizzleX = swiz[sx];
93 src->Register.SwizzleY = swiz[sy];
94 src->Register.SwizzleZ = swiz[sz];
95 src->Register.SwizzleW = swiz[sw];
96 }
97
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102 /*
103 * if (dst.x aliases src.x) {
104 * MOV tmpA.x, src.x
105 * src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111 static bool
112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115 if ((dst->Register.File == src->Register.File) &&
116 (dst->Register.Index == src->Register.Index)) {
117 unsigned i, actual_mask = 0;
118 unsigned swiz[4];
119 get_swiz(swiz, &src->Register);
120 for (i = 0; i < 4; i++)
121 if (src_mask & (1 << i))
122 actual_mask |= (1 << swiz[i]);
123 if (actual_mask & dst_mask)
124 return true;
125 }
126 return false;
127 }
128
129 static void
130 create_mov(struct tgsi_transform_context *tctx,
131 const struct tgsi_full_dst_register *dst,
132 const struct tgsi_full_src_register *src,
133 unsigned mask, unsigned saturate)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.Saturate = saturate;
140 new_inst.Instruction.NumDstRegs = 1;
141 reg_dst(&new_inst.Dst[0], dst, mask);
142 new_inst.Instruction.NumSrcRegs = 1;
143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144 tctx->emit_instruction(tctx, &new_inst);
145 }
146
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 * 1 : instruction itself
154 * 1 : dst
155 * 1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 * 1 : instruction itself
159 * 2 : dst
160 * 2 * nargs : srcN
161 */
162
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
165
166 /*
167 * Lowering Translators:
168 */
169
170 /* DST - Distance Vector
171 * dst.x = 1.0
172 * dst.y = src0.y \times src1.y
173 * dst.z = src0.z
174 * dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
181 * src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
185 * src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP 2
195 static void
196 transform_dst(struct tgsi_transform_context *tctx,
197 struct tgsi_full_instruction *inst)
198 {
199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200 struct tgsi_full_dst_register *dst = &inst->Dst[0];
201 struct tgsi_full_src_register *src0 = &inst->Src[0];
202 struct tgsi_full_src_register *src1 = &inst->Src[1];
203 struct tgsi_full_instruction new_inst;
204
205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207 src0 = &ctx->tmp[A].src;
208 }
209
210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212 src1 = &ctx->tmp[B].src;
213 }
214
215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst = tgsi_default_full_instruction();
218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219 new_inst.Instruction.NumDstRegs = 1;
220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221 new_inst.Instruction.NumSrcRegs = 2;
222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224 tctx->emit_instruction(tctx, &new_inst);
225 }
226
227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228 /* MOV dst.z, src0.z */
229 new_inst = tgsi_default_full_instruction();
230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231 new_inst.Instruction.NumDstRegs = 1;
232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233 new_inst.Instruction.NumSrcRegs = 1;
234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235 tctx->emit_instruction(tctx, &new_inst);
236 }
237
238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239 /* MOV dst.w, src1.w */
240 new_inst = tgsi_default_full_instruction();
241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242 new_inst.Instruction.NumDstRegs = 1;
243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244 new_inst.Instruction.NumSrcRegs = 1;
245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246 tctx->emit_instruction(tctx, &new_inst);
247 }
248
249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst = tgsi_default_full_instruction();
252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253 new_inst.Instruction.NumDstRegs = 1;
254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255 new_inst.Instruction.NumSrcRegs = 1;
256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257 tctx->emit_instruction(tctx, &new_inst);
258 }
259 }
260
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
265 * dst.w = 1.0
266 *
267 * ; needs: 1 tmp, imm{1.0}
268 * MUL tmpA.xyz, src1.yzx, src0.zxy
269 * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz
270 * MOV dst.w, imm{1.0}
271 */
272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2))
273 #define XPD_TMP 1
274 static void
275 transform_xpd(struct tgsi_transform_context *tctx,
276 struct tgsi_full_instruction *inst)
277 {
278 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
279 struct tgsi_full_dst_register *dst = &inst->Dst[0];
280 struct tgsi_full_src_register *src0 = &inst->Src[0];
281 struct tgsi_full_src_register *src1 = &inst->Src[1];
282 struct tgsi_full_instruction new_inst;
283
284 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
285 /* MUL tmpA.xyz, src1.yzx, src0.zxy */
286 new_inst = tgsi_default_full_instruction();
287 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
288 new_inst.Instruction.NumDstRegs = 1;
289 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
290 new_inst.Instruction.NumSrcRegs = 2;
291 reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
292 reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
293 tctx->emit_instruction(tctx, &new_inst);
294
295 /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */
296 new_inst = tgsi_default_full_instruction();
297 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
298 new_inst.Instruction.NumDstRegs = 1;
299 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
300 new_inst.Instruction.NumSrcRegs = 3;
301 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
302 reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
303 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
304 new_inst.Src[2].Register.Negate = true;
305 tctx->emit_instruction(tctx, &new_inst);
306 }
307
308 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
309 /* MOV dst.w, imm{1.0} */
310 new_inst = tgsi_default_full_instruction();
311 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
312 new_inst.Instruction.NumDstRegs = 1;
313 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
314 new_inst.Instruction.NumSrcRegs = 1;
315 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
316 tctx->emit_instruction(tctx, &new_inst);
317 }
318 }
319
320 /* SCS - Sine Cosine
321 * dst.x = \cos{src.x}
322 * dst.y = \sin{src.x}
323 * dst.z = 0.0
324 * dst.w = 1.0
325 *
326 * ; needs: 1 tmp, imm{0.0, 1.0}
327 * if (dst.x aliases src.x) {
328 * MOV tmpA.x, src.x
329 * src = tmpA
330 * }
331 * COS dst.x, src.x
332 * SIN dst.y, src.x
333 * MOV dst.zw, imm{0.0, 1.0}
334 */
335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
336 #define SCS_TMP 1
337 static void
338 transform_scs(struct tgsi_transform_context *tctx,
339 struct tgsi_full_instruction *inst)
340 {
341 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
342 struct tgsi_full_dst_register *dst = &inst->Dst[0];
343 struct tgsi_full_src_register *src = &inst->Src[0];
344 struct tgsi_full_instruction new_inst;
345
346 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
347 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
348 src = &ctx->tmp[A].src;
349 }
350
351 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
352 /* COS dst.x, src.x */
353 new_inst = tgsi_default_full_instruction();
354 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
355 new_inst.Instruction.NumDstRegs = 1;
356 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
357 new_inst.Instruction.NumSrcRegs = 1;
358 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
359 tctx->emit_instruction(tctx, &new_inst);
360 }
361
362 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
363 /* SIN dst.y, src.x */
364 new_inst = tgsi_default_full_instruction();
365 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
366 new_inst.Instruction.NumDstRegs = 1;
367 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
368 new_inst.Instruction.NumSrcRegs = 1;
369 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
370 tctx->emit_instruction(tctx, &new_inst);
371 }
372
373 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
374 /* MOV dst.zw, imm{0.0, 1.0} */
375 new_inst = tgsi_default_full_instruction();
376 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
377 new_inst.Instruction.NumDstRegs = 1;
378 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
379 new_inst.Instruction.NumSrcRegs = 1;
380 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
381 tctx->emit_instruction(tctx, &new_inst);
382 }
383 }
384
385 /* LRP - Linear Interpolate
386 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
387 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
388 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
389 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
390 *
391 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
392 * can then become: src0 \times src1 - (src0 \times src2 - src2)
393 *
394 * ; needs: 1 tmp
395 * MAD tmpA, src0, src2, -src2
396 * MAD dst, src0, src1, -tmpA
397 */
398 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
399 #define LRP_TMP 1
400 static void
401 transform_lrp(struct tgsi_transform_context *tctx,
402 struct tgsi_full_instruction *inst)
403 {
404 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
405 struct tgsi_full_dst_register *dst = &inst->Dst[0];
406 struct tgsi_full_src_register *src0 = &inst->Src[0];
407 struct tgsi_full_src_register *src1 = &inst->Src[1];
408 struct tgsi_full_src_register *src2 = &inst->Src[2];
409 struct tgsi_full_instruction new_inst;
410
411 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
412 /* MAD tmpA, src0, src2, -src2 */
413 new_inst = tgsi_default_full_instruction();
414 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
415 new_inst.Instruction.NumDstRegs = 1;
416 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
417 new_inst.Instruction.NumSrcRegs = 3;
418 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
419 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
420 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
421 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
422 tctx->emit_instruction(tctx, &new_inst);
423
424 /* MAD dst, src0, src1, -tmpA */
425 new_inst = tgsi_default_full_instruction();
426 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
427 new_inst.Instruction.NumDstRegs = 1;
428 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
429 new_inst.Instruction.NumSrcRegs = 3;
430 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
431 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
432 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
433 new_inst.Src[2].Register.Negate = true;
434 tctx->emit_instruction(tctx, &new_inst);
435 }
436 }
437
438 /* FRC - Fraction
439 * dst.x = src.x - \lfloor src.x\rfloor
440 * dst.y = src.y - \lfloor src.y\rfloor
441 * dst.z = src.z - \lfloor src.z\rfloor
442 * dst.w = src.w - \lfloor src.w\rfloor
443 *
444 * ; needs: 1 tmp
445 * FLR tmpA, src
446 * SUB dst, src, tmpA
447 */
448 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
449 #define FRC_TMP 1
450 static void
451 transform_frc(struct tgsi_transform_context *tctx,
452 struct tgsi_full_instruction *inst)
453 {
454 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
455 struct tgsi_full_dst_register *dst = &inst->Dst[0];
456 struct tgsi_full_src_register *src = &inst->Src[0];
457 struct tgsi_full_instruction new_inst;
458
459 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
460 /* FLR tmpA, src */
461 new_inst = tgsi_default_full_instruction();
462 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
463 new_inst.Instruction.NumDstRegs = 1;
464 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
465 new_inst.Instruction.NumSrcRegs = 1;
466 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
467 tctx->emit_instruction(tctx, &new_inst);
468
469 /* SUB dst, src, tmpA */
470 new_inst = tgsi_default_full_instruction();
471 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
472 new_inst.Instruction.NumDstRegs = 1;
473 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
474 new_inst.Instruction.NumSrcRegs = 2;
475 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
476 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
477 new_inst.Src[1].Register.Negate = 1;
478 tctx->emit_instruction(tctx, &new_inst);
479 }
480 }
481
482 /* POW - Power
483 * dst.x = src0.x^{src1.x}
484 * dst.y = src0.x^{src1.x}
485 * dst.z = src0.x^{src1.x}
486 * dst.w = src0.x^{src1.x}
487 *
488 * ; needs: 1 tmp
489 * LG2 tmpA.x, src0.x
490 * MUL tmpA.x, src1.x, tmpA.x
491 * EX2 dst, tmpA.x
492 */
493 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
494 #define POW_TMP 1
495 static void
496 transform_pow(struct tgsi_transform_context *tctx,
497 struct tgsi_full_instruction *inst)
498 {
499 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
500 struct tgsi_full_dst_register *dst = &inst->Dst[0];
501 struct tgsi_full_src_register *src0 = &inst->Src[0];
502 struct tgsi_full_src_register *src1 = &inst->Src[1];
503 struct tgsi_full_instruction new_inst;
504
505 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
506 /* LG2 tmpA.x, src0.x */
507 new_inst = tgsi_default_full_instruction();
508 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
509 new_inst.Instruction.NumDstRegs = 1;
510 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
511 new_inst.Instruction.NumSrcRegs = 1;
512 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
513 tctx->emit_instruction(tctx, &new_inst);
514
515 /* MUL tmpA.x, src1.x, tmpA.x */
516 new_inst = tgsi_default_full_instruction();
517 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
518 new_inst.Instruction.NumDstRegs = 1;
519 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
520 new_inst.Instruction.NumSrcRegs = 2;
521 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
522 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
523 tctx->emit_instruction(tctx, &new_inst);
524
525 /* EX2 dst, tmpA.x */
526 new_inst = tgsi_default_full_instruction();
527 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
528 new_inst.Instruction.NumDstRegs = 1;
529 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
530 new_inst.Instruction.NumSrcRegs = 1;
531 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
532 tctx->emit_instruction(tctx, &new_inst);
533 }
534 }
535
536 /* LIT - Light Coefficients
537 * dst.x = 1.0
538 * dst.y = max(src.x, 0.0)
539 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
540 * dst.w = 1.0
541 *
542 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
543 * MAX tmpA.xy, src.xy, imm{0.0}
544 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
545 * LG2 tmpA.y, tmpA.y
546 * MUL tmpA.y, tmpA.z, tmpA.y
547 * EX2 tmpA.y, tmpA.y
548 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
549 * MOV dst.yz, tmpA.xy
550 * MOV dst.xw, imm{1.0}
551 */
552 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
553 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
554 #define LIT_TMP 1
555 static void
556 transform_lit(struct tgsi_transform_context *tctx,
557 struct tgsi_full_instruction *inst)
558 {
559 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
560 struct tgsi_full_dst_register *dst = &inst->Dst[0];
561 struct tgsi_full_src_register *src = &inst->Src[0];
562 struct tgsi_full_instruction new_inst;
563
564 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
565 /* MAX tmpA.xy, src.xy, imm{0.0} */
566 new_inst = tgsi_default_full_instruction();
567 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
568 new_inst.Instruction.NumDstRegs = 1;
569 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
570 new_inst.Instruction.NumSrcRegs = 2;
571 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
572 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
573 tctx->emit_instruction(tctx, &new_inst);
574
575 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
576 new_inst = tgsi_default_full_instruction();
577 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
578 new_inst.Instruction.NumDstRegs = 1;
579 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
580 new_inst.Instruction.NumSrcRegs = 3;
581 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
582 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
583 new_inst.Src[1].Register.Negate = true;
584 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
585 tctx->emit_instruction(tctx, &new_inst);
586
587 /* LG2 tmpA.y, tmpA.y */
588 new_inst = tgsi_default_full_instruction();
589 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
590 new_inst.Instruction.NumDstRegs = 1;
591 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
592 new_inst.Instruction.NumSrcRegs = 1;
593 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
594 tctx->emit_instruction(tctx, &new_inst);
595
596 /* MUL tmpA.y, tmpA.z, tmpA.y */
597 new_inst = tgsi_default_full_instruction();
598 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
599 new_inst.Instruction.NumDstRegs = 1;
600 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
601 new_inst.Instruction.NumSrcRegs = 2;
602 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
603 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
604 tctx->emit_instruction(tctx, &new_inst);
605
606 /* EX2 tmpA.y, tmpA.y */
607 new_inst = tgsi_default_full_instruction();
608 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
609 new_inst.Instruction.NumDstRegs = 1;
610 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
611 new_inst.Instruction.NumSrcRegs = 1;
612 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
613 tctx->emit_instruction(tctx, &new_inst);
614
615 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
616 new_inst = tgsi_default_full_instruction();
617 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
618 new_inst.Instruction.NumDstRegs = 1;
619 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
620 new_inst.Instruction.NumSrcRegs = 3;
621 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
622 new_inst.Src[0].Register.Negate = true;
623 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
624 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
625 tctx->emit_instruction(tctx, &new_inst);
626
627 /* MOV dst.yz, tmpA.xy */
628 new_inst = tgsi_default_full_instruction();
629 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
630 new_inst.Instruction.NumDstRegs = 1;
631 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
632 new_inst.Instruction.NumSrcRegs = 1;
633 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
634 tctx->emit_instruction(tctx, &new_inst);
635 }
636
637 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
638 /* MOV dst.xw, imm{1.0} */
639 new_inst = tgsi_default_full_instruction();
640 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
641 new_inst.Instruction.NumDstRegs = 1;
642 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
643 new_inst.Instruction.NumSrcRegs = 1;
644 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
645 tctx->emit_instruction(tctx, &new_inst);
646 }
647 }
648
649 /* EXP - Approximate Exponential Base 2
650 * dst.x = 2^{\lfloor src.x\rfloor}
651 * dst.y = src.x - \lfloor src.x\rfloor
652 * dst.z = 2^{src.x}
653 * dst.w = 1.0
654 *
655 * ; needs: 1 tmp, imm{1.0}
656 * if (lowering FLR) {
657 * FRC tmpA.x, src.x
658 * SUB tmpA.x, src.x, tmpA.x
659 * } else {
660 * FLR tmpA.x, src.x
661 * }
662 * EX2 tmpA.y, src.x
663 * SUB dst.y, src.x, tmpA.x
664 * EX2 dst.x, tmpA.x
665 * MOV dst.z, tmpA.y
666 * MOV dst.w, imm{1.0}
667 */
668 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
669 NINST(1)+ NINST(1) - OINST(1))
670 #define EXP_TMP 1
671 static void
672 transform_exp(struct tgsi_transform_context *tctx,
673 struct tgsi_full_instruction *inst)
674 {
675 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
676 struct tgsi_full_dst_register *dst = &inst->Dst[0];
677 struct tgsi_full_src_register *src = &inst->Src[0];
678 struct tgsi_full_instruction new_inst;
679
680 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
681 if (ctx->config->lower_FLR) {
682 /* FRC tmpA.x, src.x */
683 new_inst = tgsi_default_full_instruction();
684 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
685 new_inst.Instruction.NumDstRegs = 1;
686 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
687 new_inst.Instruction.NumSrcRegs = 1;
688 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
689 tctx->emit_instruction(tctx, &new_inst);
690
691 /* SUB tmpA.x, src.x, tmpA.x */
692 new_inst = tgsi_default_full_instruction();
693 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
694 new_inst.Instruction.NumDstRegs = 1;
695 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
696 new_inst.Instruction.NumSrcRegs = 2;
697 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
698 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
699 new_inst.Src[1].Register.Negate = 1;
700 tctx->emit_instruction(tctx, &new_inst);
701 } else {
702 /* FLR tmpA.x, src.x */
703 new_inst = tgsi_default_full_instruction();
704 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
705 new_inst.Instruction.NumDstRegs = 1;
706 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
707 new_inst.Instruction.NumSrcRegs = 1;
708 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
709 tctx->emit_instruction(tctx, &new_inst);
710 }
711 }
712
713 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
714 /* EX2 tmpA.y, src.x */
715 new_inst = tgsi_default_full_instruction();
716 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
717 new_inst.Instruction.NumDstRegs = 1;
718 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
719 new_inst.Instruction.NumSrcRegs = 1;
720 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
721 tctx->emit_instruction(tctx, &new_inst);
722 }
723
724 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
725 /* SUB dst.y, src.x, tmpA.x */
726 new_inst = tgsi_default_full_instruction();
727 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
728 new_inst.Instruction.NumDstRegs = 1;
729 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
730 new_inst.Instruction.NumSrcRegs = 2;
731 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
732 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
733 new_inst.Src[1].Register.Negate = 1;
734 tctx->emit_instruction(tctx, &new_inst);
735 }
736
737 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
738 /* EX2 dst.x, tmpA.x */
739 new_inst = tgsi_default_full_instruction();
740 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
741 new_inst.Instruction.NumDstRegs = 1;
742 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
743 new_inst.Instruction.NumSrcRegs = 1;
744 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
745 tctx->emit_instruction(tctx, &new_inst);
746 }
747
748 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
749 /* MOV dst.z, tmpA.y */
750 new_inst = tgsi_default_full_instruction();
751 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
752 new_inst.Instruction.NumDstRegs = 1;
753 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
754 new_inst.Instruction.NumSrcRegs = 1;
755 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
756 tctx->emit_instruction(tctx, &new_inst);
757 }
758
759 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
760 /* MOV dst.w, imm{1.0} */
761 new_inst = tgsi_default_full_instruction();
762 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
763 new_inst.Instruction.NumDstRegs = 1;
764 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
765 new_inst.Instruction.NumSrcRegs = 1;
766 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
767 tctx->emit_instruction(tctx, &new_inst);
768 }
769 }
770
771 /* LOG - Approximate Logarithm Base 2
772 * dst.x = \lfloor\log_2{|src.x|}\rfloor
773 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
774 * dst.z = \log_2{|src.x|}
775 * dst.w = 1.0
776 *
777 * ; needs: 1 tmp, imm{1.0}
778 * LG2 tmpA.x, |src.x|
779 * if (lowering FLR) {
780 * FRC tmpA.y, tmpA.x
781 * SUB tmpA.y, tmpA.x, tmpA.y
782 * } else {
783 * FLR tmpA.y, tmpA.x
784 * }
785 * EX2 tmpA.z, tmpA.y
786 * RCP tmpA.z, tmpA.z
787 * MUL dst.y, |src.x|, tmpA.z
788 * MOV dst.xz, tmpA.yx
789 * MOV dst.w, imm{1.0}
790 */
791 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
792 NINST(2) + NINST(1) + NINST(1) - OINST(1))
793 #define LOG_TMP 1
794 static void
795 transform_log(struct tgsi_transform_context *tctx,
796 struct tgsi_full_instruction *inst)
797 {
798 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
799 struct tgsi_full_dst_register *dst = &inst->Dst[0];
800 struct tgsi_full_src_register *src = &inst->Src[0];
801 struct tgsi_full_instruction new_inst;
802
803 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
804 /* LG2 tmpA.x, |src.x| */
805 new_inst = tgsi_default_full_instruction();
806 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
807 new_inst.Instruction.NumDstRegs = 1;
808 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
809 new_inst.Instruction.NumSrcRegs = 1;
810 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
811 new_inst.Src[0].Register.Absolute = true;
812 tctx->emit_instruction(tctx, &new_inst);
813 }
814
815 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
816 if (ctx->config->lower_FLR) {
817 /* FRC tmpA.y, tmpA.x */
818 new_inst = tgsi_default_full_instruction();
819 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
820 new_inst.Instruction.NumDstRegs = 1;
821 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
822 new_inst.Instruction.NumSrcRegs = 1;
823 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
824 tctx->emit_instruction(tctx, &new_inst);
825
826 /* SUB tmpA.y, tmpA.x, tmpA.y */
827 new_inst = tgsi_default_full_instruction();
828 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
829 new_inst.Instruction.NumDstRegs = 1;
830 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
831 new_inst.Instruction.NumSrcRegs = 2;
832 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
833 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
834 new_inst.Src[1].Register.Negate = 1;
835 tctx->emit_instruction(tctx, &new_inst);
836 } else {
837 /* FLR tmpA.y, tmpA.x */
838 new_inst = tgsi_default_full_instruction();
839 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
840 new_inst.Instruction.NumDstRegs = 1;
841 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
842 new_inst.Instruction.NumSrcRegs = 1;
843 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
844 tctx->emit_instruction(tctx, &new_inst);
845 }
846 }
847
848 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
849 /* EX2 tmpA.z, tmpA.y */
850 new_inst = tgsi_default_full_instruction();
851 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
852 new_inst.Instruction.NumDstRegs = 1;
853 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
854 new_inst.Instruction.NumSrcRegs = 1;
855 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
856 tctx->emit_instruction(tctx, &new_inst);
857
858 /* RCP tmpA.z, tmpA.z */
859 new_inst = tgsi_default_full_instruction();
860 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
861 new_inst.Instruction.NumDstRegs = 1;
862 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
863 new_inst.Instruction.NumSrcRegs = 1;
864 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
865 tctx->emit_instruction(tctx, &new_inst);
866
867 /* MUL dst.y, |src.x|, tmpA.z */
868 new_inst = tgsi_default_full_instruction();
869 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
870 new_inst.Instruction.NumDstRegs = 1;
871 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
872 new_inst.Instruction.NumSrcRegs = 2;
873 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
874 new_inst.Src[0].Register.Absolute = true;
875 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
876 tctx->emit_instruction(tctx, &new_inst);
877 }
878
879 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
880 /* MOV dst.xz, tmpA.yx */
881 new_inst = tgsi_default_full_instruction();
882 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
883 new_inst.Instruction.NumDstRegs = 1;
884 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
885 new_inst.Instruction.NumSrcRegs = 1;
886 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
887 tctx->emit_instruction(tctx, &new_inst);
888 }
889
890 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
891 /* MOV dst.w, imm{1.0} */
892 new_inst = tgsi_default_full_instruction();
893 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
894 new_inst.Instruction.NumDstRegs = 1;
895 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
896 new_inst.Instruction.NumSrcRegs = 1;
897 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
898 tctx->emit_instruction(tctx, &new_inst);
899 }
900 }
901
902 /* DP4 - 4-component Dot Product
903 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
904 *
905 * DP3 - 3-component Dot Product
906 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
907 *
908 * DPH - Homogeneous Dot Product
909 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
910 *
911 * DP2 - 2-component Dot Product
912 * dst = src0.x \times src1.x + src0.y \times src1.y
913 *
914 * DP2A - 2-component Dot Product And Add
915 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
916 *
917 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
918 * operations, which is what you'd prefer for a ISA that is natively
919 * scalar. Probably a native vector ISA would at least already have
920 * DP4/DP3 instructions, but perhaps there is room for an alternative
921 * translation for DPH/DP2/DP2A using vector instructions.
922 *
923 * ; needs: 1 tmp
924 * MUL tmpA.x, src0.x, src1.x
925 * MAD tmpA.x, src0.y, src1.y, tmpA.x
926 * if (DPH || DP3 || DP4) {
927 * MAD tmpA.x, src0.z, src1.z, tmpA.x
928 * if (DPH) {
929 * ADD tmpA.x, src1.w, tmpA.x
930 * } else if (DP4) {
931 * MAD tmpA.x, src0.w, src1.w, tmpA.x
932 * }
933 * } else if (DP2A) {
934 * ADD tmpA.x, src2.x, tmpA.x
935 * }
936 * ; fixup last instruction to replicate into dst
937 */
938 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
939 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
940 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
941 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
942 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
943 #define DOTP_TMP 1
944 static void
945 transform_dotp(struct tgsi_transform_context *tctx,
946 struct tgsi_full_instruction *inst)
947 {
948 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
949 struct tgsi_full_dst_register *dst = &inst->Dst[0];
950 struct tgsi_full_src_register *src0 = &inst->Src[0];
951 struct tgsi_full_src_register *src1 = &inst->Src[1];
952 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
953 struct tgsi_full_instruction new_inst;
954 unsigned opcode = inst->Instruction.Opcode;
955
956 /* NOTE: any potential last instruction must replicate src on all
957 * components (since it could be re-written to write to final dst)
958 */
959
960 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
961 /* MUL tmpA.x, src0.x, src1.x */
962 new_inst = tgsi_default_full_instruction();
963 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
964 new_inst.Instruction.NumDstRegs = 1;
965 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
966 new_inst.Instruction.NumSrcRegs = 2;
967 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
968 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
969 tctx->emit_instruction(tctx, &new_inst);
970
971 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
972 new_inst = tgsi_default_full_instruction();
973 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
974 new_inst.Instruction.NumDstRegs = 1;
975 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
976 new_inst.Instruction.NumSrcRegs = 3;
977 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
978 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
979 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
980
981 if ((opcode == TGSI_OPCODE_DPH) ||
982 (opcode == TGSI_OPCODE_DP3) ||
983 (opcode == TGSI_OPCODE_DP4)) {
984 tctx->emit_instruction(tctx, &new_inst);
985
986 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
987 new_inst = tgsi_default_full_instruction();
988 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
989 new_inst.Instruction.NumDstRegs = 1;
990 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
991 new_inst.Instruction.NumSrcRegs = 3;
992 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
993 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
994 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
995
996 if (opcode == TGSI_OPCODE_DPH) {
997 tctx->emit_instruction(tctx, &new_inst);
998
999 /* ADD tmpA.x, src1.w, tmpA.x */
1000 new_inst = tgsi_default_full_instruction();
1001 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1002 new_inst.Instruction.NumDstRegs = 1;
1003 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1004 new_inst.Instruction.NumSrcRegs = 2;
1005 reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
1006 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1007 } else if (opcode == TGSI_OPCODE_DP4) {
1008 tctx->emit_instruction(tctx, &new_inst);
1009
1010 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
1011 new_inst = tgsi_default_full_instruction();
1012 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1013 new_inst.Instruction.NumDstRegs = 1;
1014 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1015 new_inst.Instruction.NumSrcRegs = 3;
1016 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
1017 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
1018 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1019 }
1020 } else if (opcode == TGSI_OPCODE_DP2A) {
1021 tctx->emit_instruction(tctx, &new_inst);
1022
1023 /* ADD tmpA.x, src2.x, tmpA.x */
1024 new_inst = tgsi_default_full_instruction();
1025 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1026 new_inst.Instruction.NumDstRegs = 1;
1027 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1028 new_inst.Instruction.NumSrcRegs = 2;
1029 reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
1030 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1031 }
1032
1033 /* fixup last instruction to write to dst: */
1034 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1035
1036 tctx->emit_instruction(tctx, &new_inst);
1037 }
1038 }
1039
1040 /* FLR - floor, CEIL - ceil
1041 * ; needs: 1 tmp
1042 * if (CEIL) {
1043 * FRC tmpA, -src
1044 * ADD dst, src, tmpA
1045 * } else {
1046 * FRC tmpA, src
1047 * SUB dst, src, tmpA
1048 * }
1049 */
1050 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
1051 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
1052 #define FLR_TMP 1
1053 #define CEIL_TMP 1
1054 static void
1055 transform_flr_ceil(struct tgsi_transform_context *tctx,
1056 struct tgsi_full_instruction *inst)
1057 {
1058 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1059 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1060 struct tgsi_full_src_register *src0 = &inst->Src[0];
1061 struct tgsi_full_instruction new_inst;
1062 unsigned opcode = inst->Instruction.Opcode;
1063
1064 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1065 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
1066 new_inst = tgsi_default_full_instruction();
1067 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1068 new_inst.Instruction.NumDstRegs = 1;
1069 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1070 new_inst.Instruction.NumSrcRegs = 1;
1071 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1072
1073 if (opcode == TGSI_OPCODE_CEIL)
1074 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
1075 tctx->emit_instruction(tctx, &new_inst);
1076
1077 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
1078 new_inst = tgsi_default_full_instruction();
1079 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1080 new_inst.Instruction.NumDstRegs = 1;
1081 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1082 new_inst.Instruction.NumSrcRegs = 2;
1083 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1084 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1085 if (opcode == TGSI_OPCODE_FLR)
1086 new_inst.Src[1].Register.Negate = 1;
1087 tctx->emit_instruction(tctx, &new_inst);
1088 }
1089 }
1090
1091 /* TRUNC - truncate off fractional part
1092 * dst.x = trunc(src.x)
1093 * dst.y = trunc(src.y)
1094 * dst.z = trunc(src.z)
1095 * dst.w = trunc(src.w)
1096 *
1097 * ; needs: 1 tmp
1098 * if (lower FLR) {
1099 * FRC tmpA, |src|
1100 * SUB tmpA, |src|, tmpA
1101 * } else {
1102 * FLR tmpA, |src|
1103 * }
1104 * CMP dst, src, -tmpA, tmpA
1105 */
1106 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
1107 #define TRUNC_TMP 1
1108 static void
1109 transform_trunc(struct tgsi_transform_context *tctx,
1110 struct tgsi_full_instruction *inst)
1111 {
1112 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1113 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1114 struct tgsi_full_src_register *src0 = &inst->Src[0];
1115 struct tgsi_full_instruction new_inst;
1116
1117 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1118 if (ctx->config->lower_FLR) {
1119 new_inst = tgsi_default_full_instruction();
1120 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1121 new_inst.Instruction.NumDstRegs = 1;
1122 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1123 new_inst.Instruction.NumSrcRegs = 1;
1124 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1125 new_inst.Src[0].Register.Absolute = true;
1126 new_inst.Src[0].Register.Negate = false;
1127 tctx->emit_instruction(tctx, &new_inst);
1128
1129 new_inst = tgsi_default_full_instruction();
1130 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1131 new_inst.Instruction.NumDstRegs = 1;
1132 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1133 new_inst.Instruction.NumSrcRegs = 2;
1134 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1135 new_inst.Src[0].Register.Absolute = true;
1136 new_inst.Src[0].Register.Negate = false;
1137 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1138 new_inst.Src[1].Register.Negate = 1;
1139 tctx->emit_instruction(tctx, &new_inst);
1140 } else {
1141 new_inst = tgsi_default_full_instruction();
1142 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
1143 new_inst.Instruction.NumDstRegs = 1;
1144 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1145 new_inst.Instruction.NumSrcRegs = 1;
1146 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1147 new_inst.Src[0].Register.Absolute = true;
1148 new_inst.Src[0].Register.Negate = false;
1149 tctx->emit_instruction(tctx, &new_inst);
1150 }
1151
1152 new_inst = tgsi_default_full_instruction();
1153 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1154 new_inst.Instruction.NumDstRegs = 1;
1155 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1156 new_inst.Instruction.NumSrcRegs = 3;
1157 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1158 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1159 new_inst.Src[1].Register.Negate = true;
1160 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1161 tctx->emit_instruction(tctx, &new_inst);
1162 }
1163 }
1164
1165 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1166 * in the case of TXP, the clamping must happen *after* projection, so
1167 * we need to lower TXP to TEX.
1168 *
1169 * MOV tmpA, src0
1170 * if (opc == TXP) {
1171 * ; do perspective division manually before clamping:
1172 * RCP tmpB, tmpA.w
1173 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1174 * opc = TEX;
1175 * }
1176 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1177 * <opc> dst, tmpA, ...
1178 */
1179 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1180 #define SAMP_TMP 2
1181 static int
1182 transform_samp(struct tgsi_transform_context *tctx,
1183 struct tgsi_full_instruction *inst)
1184 {
1185 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1186 struct tgsi_full_src_register *coord = &inst->Src[0];
1187 struct tgsi_full_src_register *samp;
1188 struct tgsi_full_instruction new_inst;
1189 /* mask is clamped coords, pmask is all coords (for projection): */
1190 unsigned mask = 0, pmask = 0, smask;
1191 unsigned tex = inst->Texture.Texture;
1192 unsigned opcode = inst->Instruction.Opcode;
1193 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1194 (ctx->config->lower_TXP & (1 << tex));
1195
1196 if (opcode == TGSI_OPCODE_TXB2) {
1197 samp = &inst->Src[2];
1198 } else {
1199 samp = &inst->Src[1];
1200 }
1201
1202 /* convert sampler # to bitmask to test: */
1203 smask = 1 << samp->Register.Index;
1204
1205 /* check if we actually need to lower this one: */
1206 if (!(ctx->saturate & smask) && !lower_txp)
1207 return -1;
1208
1209 /* figure out which coordinates need saturating:
1210 * - RECT textures should not get saturated
1211 * - array index coords should not get saturated
1212 */
1213 switch (tex) {
1214 case TGSI_TEXTURE_3D:
1215 case TGSI_TEXTURE_CUBE:
1216 case TGSI_TEXTURE_CUBE_ARRAY:
1217 case TGSI_TEXTURE_SHADOWCUBE:
1218 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1219 if (ctx->config->saturate_r & smask)
1220 mask |= TGSI_WRITEMASK_Z;
1221 pmask |= TGSI_WRITEMASK_Z;
1222 /* fallthrough */
1223
1224 case TGSI_TEXTURE_2D:
1225 case TGSI_TEXTURE_2D_ARRAY:
1226 case TGSI_TEXTURE_SHADOW2D:
1227 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1228 case TGSI_TEXTURE_2D_MSAA:
1229 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1230 if (ctx->config->saturate_t & smask)
1231 mask |= TGSI_WRITEMASK_Y;
1232 pmask |= TGSI_WRITEMASK_Y;
1233 /* fallthrough */
1234
1235 case TGSI_TEXTURE_1D:
1236 case TGSI_TEXTURE_1D_ARRAY:
1237 case TGSI_TEXTURE_SHADOW1D:
1238 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1239 if (ctx->config->saturate_s & smask)
1240 mask |= TGSI_WRITEMASK_X;
1241 pmask |= TGSI_WRITEMASK_X;
1242 break;
1243
1244 case TGSI_TEXTURE_RECT:
1245 case TGSI_TEXTURE_SHADOWRECT:
1246 /* we don't saturate, but in case of lower_txp we
1247 * still need to do the perspective divide:
1248 */
1249 pmask = TGSI_WRITEMASK_XY;
1250 break;
1251 }
1252
1253 /* sanity check.. driver could be asking to saturate a non-
1254 * existent coordinate component:
1255 */
1256 if (!mask && !lower_txp)
1257 return -1;
1258
1259 /* MOV tmpA, src0 */
1260 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1261
1262 /* This is a bit sad.. we need to clamp *after* the coords
1263 * are projected, which means lowering TXP to TEX and doing
1264 * the projection ourself. But since I haven't figured out
1265 * how to make the lowering code deliver an electric shock
1266 * to anyone using GL_CLAMP, we must do this instead:
1267 */
1268 if (opcode == TGSI_OPCODE_TXP) {
1269 /* RCP tmpB.x tmpA.w */
1270 new_inst = tgsi_default_full_instruction();
1271 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1272 new_inst.Instruction.NumDstRegs = 1;
1273 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1274 new_inst.Instruction.NumSrcRegs = 1;
1275 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1276 tctx->emit_instruction(tctx, &new_inst);
1277
1278 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1279 new_inst = tgsi_default_full_instruction();
1280 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1281 new_inst.Instruction.NumDstRegs = 1;
1282 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1283 new_inst.Instruction.NumSrcRegs = 2;
1284 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1285 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1286 tctx->emit_instruction(tctx, &new_inst);
1287
1288 opcode = TGSI_OPCODE_TEX;
1289 }
1290
1291 /* MOV_SAT tmpA.<mask>, tmpA */
1292 if (mask) {
1293 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1294 }
1295
1296 /* modify the texture samp instruction to take fixed up coord: */
1297 new_inst = *inst;
1298 new_inst.Instruction.Opcode = opcode;
1299 new_inst.Src[0] = ctx->tmp[A].src;
1300 tctx->emit_instruction(tctx, &new_inst);
1301
1302 return 0;
1303 }
1304
1305 /* Two-sided color emulation:
1306 * For each COLOR input, create a corresponding BCOLOR input, plus
1307 * CMP instruction to select front or back color based on FACE
1308 */
1309 #define TWOSIDE_GROW(n) ( \
1310 2 + /* FACE */ \
1311 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1312 ((n) * 1) + /* TEMP[] */ \
1313 ((n) * NINST(3)) /* CMP instr */ \
1314 )
1315
1316 static void
1317 emit_twoside(struct tgsi_transform_context *tctx)
1318 {
1319 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1320 struct tgsi_shader_info *info = ctx->info;
1321 struct tgsi_full_declaration decl;
1322 struct tgsi_full_instruction new_inst;
1323 unsigned inbase, tmpbase;
1324 int i;
1325
1326 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1327 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1328
1329 /* additional inputs for BCOLOR's */
1330 for (i = 0; i < ctx->two_side_colors; i++) {
1331 unsigned in_idx = ctx->two_side_idx[i];
1332 decl = tgsi_default_full_declaration();
1333 decl.Declaration.File = TGSI_FILE_INPUT;
1334 decl.Declaration.Semantic = true;
1335 decl.Range.First = decl.Range.Last = inbase + i;
1336 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1337 decl.Semantic.Index = info->input_semantic_index[in_idx];
1338 decl.Declaration.Interpolate = true;
1339 decl.Interp.Interpolate = info->input_interpolate[in_idx];
1340 decl.Interp.Location = info->input_interpolate_loc[in_idx];
1341 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1342 tctx->emit_declaration(tctx, &decl);
1343 }
1344
1345 /* additional input for FACE */
1346 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1347 decl = tgsi_default_full_declaration();
1348 decl.Declaration.File = TGSI_FILE_INPUT;
1349 decl.Declaration.Semantic = true;
1350 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1351 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1352 decl.Semantic.Index = 0;
1353 tctx->emit_declaration(tctx, &decl);
1354
1355 ctx->face_idx = decl.Range.First;
1356 }
1357
1358 /* additional temps for COLOR/BCOLOR selection: */
1359 for (i = 0; i < ctx->two_side_colors; i++) {
1360 decl = tgsi_default_full_declaration();
1361 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1362 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1363 tctx->emit_declaration(tctx, &decl);
1364 }
1365
1366 /* and finally additional instructions to select COLOR/BCOLOR: */
1367 for (i = 0; i < ctx->two_side_colors; i++) {
1368 new_inst = tgsi_default_full_instruction();
1369 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1370
1371 new_inst.Instruction.NumDstRegs = 1;
1372 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1373 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1374 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1375
1376 new_inst.Instruction.NumSrcRegs = 3;
1377 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1378 new_inst.Src[0].Register.Index = ctx->face_idx;
1379 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1380 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1381 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1382 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1383 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1384 new_inst.Src[1].Register.Index = inbase + i;
1385 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1386 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1387 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1388 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1389 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1390 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1391 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1392 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1393 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1394 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1395
1396 tctx->emit_instruction(tctx, &new_inst);
1397 }
1398 }
1399
1400 static void
1401 emit_decls(struct tgsi_transform_context *tctx)
1402 {
1403 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1404 struct tgsi_shader_info *info = ctx->info;
1405 struct tgsi_full_declaration decl;
1406 struct tgsi_full_immediate immed;
1407 unsigned tmpbase;
1408 int i;
1409
1410 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1411
1412 ctx->color_base = tmpbase + ctx->numtmp;
1413
1414 /* declare immediate: */
1415 immed = tgsi_default_full_immediate();
1416 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1417 immed.u[0].Float = 0.0;
1418 immed.u[1].Float = 1.0;
1419 immed.u[2].Float = 128.0;
1420 immed.u[3].Float = 0.0;
1421 tctx->emit_immediate(tctx, &immed);
1422
1423 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1424 ctx->imm.Register.Index = info->immediate_count;
1425 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1426 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1427 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1428 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1429
1430 /* declare temp regs: */
1431 for (i = 0; i < ctx->numtmp; i++) {
1432 decl = tgsi_default_full_declaration();
1433 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1434 decl.Range.First = decl.Range.Last = tmpbase + i;
1435 tctx->emit_declaration(tctx, &decl);
1436
1437 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1438 ctx->tmp[i].src.Register.Index = tmpbase + i;
1439 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1440 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1441 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1442 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1443
1444 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1445 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1446 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1447 }
1448
1449 if (ctx->two_side_colors)
1450 emit_twoside(tctx);
1451 }
1452
1453 static void
1454 rename_color_inputs(struct tgsi_lowering_context *ctx,
1455 struct tgsi_full_instruction *inst)
1456 {
1457 unsigned i, j;
1458 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1459 struct tgsi_src_register *src = &inst->Src[i].Register;
1460 if (src->File == TGSI_FILE_INPUT) {
1461 for (j = 0; j < ctx->two_side_colors; j++) {
1462 if (src->Index == ctx->two_side_idx[j]) {
1463 src->File = TGSI_FILE_TEMPORARY;
1464 src->Index = ctx->color_base + j;
1465 break;
1466 }
1467 }
1468 }
1469 }
1470
1471 }
1472
1473 static void
1474 transform_instr(struct tgsi_transform_context *tctx,
1475 struct tgsi_full_instruction *inst)
1476 {
1477 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1478
1479 if (!ctx->emitted_decls) {
1480 emit_decls(tctx);
1481 ctx->emitted_decls = 1;
1482 }
1483
1484 /* if emulating two-sided-color, we need to re-write some
1485 * src registers:
1486 */
1487 if (ctx->two_side_colors)
1488 rename_color_inputs(ctx, inst);
1489
1490 switch (inst->Instruction.Opcode) {
1491 case TGSI_OPCODE_DST:
1492 if (!ctx->config->lower_DST)
1493 goto skip;
1494 transform_dst(tctx, inst);
1495 break;
1496 case TGSI_OPCODE_XPD:
1497 if (!ctx->config->lower_XPD)
1498 goto skip;
1499 transform_xpd(tctx, inst);
1500 break;
1501 case TGSI_OPCODE_SCS:
1502 if (!ctx->config->lower_SCS)
1503 goto skip;
1504 transform_scs(tctx, inst);
1505 break;
1506 case TGSI_OPCODE_LRP:
1507 if (!ctx->config->lower_LRP)
1508 goto skip;
1509 transform_lrp(tctx, inst);
1510 break;
1511 case TGSI_OPCODE_FRC:
1512 if (!ctx->config->lower_FRC)
1513 goto skip;
1514 transform_frc(tctx, inst);
1515 break;
1516 case TGSI_OPCODE_POW:
1517 if (!ctx->config->lower_POW)
1518 goto skip;
1519 transform_pow(tctx, inst);
1520 break;
1521 case TGSI_OPCODE_LIT:
1522 if (!ctx->config->lower_LIT)
1523 goto skip;
1524 transform_lit(tctx, inst);
1525 break;
1526 case TGSI_OPCODE_EXP:
1527 if (!ctx->config->lower_EXP)
1528 goto skip;
1529 transform_exp(tctx, inst);
1530 break;
1531 case TGSI_OPCODE_LOG:
1532 if (!ctx->config->lower_LOG)
1533 goto skip;
1534 transform_log(tctx, inst);
1535 break;
1536 case TGSI_OPCODE_DP4:
1537 if (!ctx->config->lower_DP4)
1538 goto skip;
1539 transform_dotp(tctx, inst);
1540 break;
1541 case TGSI_OPCODE_DP3:
1542 if (!ctx->config->lower_DP3)
1543 goto skip;
1544 transform_dotp(tctx, inst);
1545 break;
1546 case TGSI_OPCODE_DPH:
1547 if (!ctx->config->lower_DPH)
1548 goto skip;
1549 transform_dotp(tctx, inst);
1550 break;
1551 case TGSI_OPCODE_DP2:
1552 if (!ctx->config->lower_DP2)
1553 goto skip;
1554 transform_dotp(tctx, inst);
1555 break;
1556 case TGSI_OPCODE_DP2A:
1557 if (!ctx->config->lower_DP2A)
1558 goto skip;
1559 transform_dotp(tctx, inst);
1560 break;
1561 case TGSI_OPCODE_FLR:
1562 if (!ctx->config->lower_FLR)
1563 goto skip;
1564 transform_flr_ceil(tctx, inst);
1565 break;
1566 case TGSI_OPCODE_CEIL:
1567 if (!ctx->config->lower_CEIL)
1568 goto skip;
1569 transform_flr_ceil(tctx, inst);
1570 break;
1571 case TGSI_OPCODE_TRUNC:
1572 if (!ctx->config->lower_TRUNC)
1573 goto skip;
1574 transform_trunc(tctx, inst);
1575 break;
1576 case TGSI_OPCODE_TEX:
1577 case TGSI_OPCODE_TXP:
1578 case TGSI_OPCODE_TXB:
1579 case TGSI_OPCODE_TXB2:
1580 case TGSI_OPCODE_TXL:
1581 if (transform_samp(tctx, inst))
1582 goto skip;
1583 break;
1584 default:
1585 skip:
1586 tctx->emit_instruction(tctx, inst);
1587 break;
1588 }
1589 }
1590
1591 /* returns NULL if no lowering required, else returns the new
1592 * tokens (which caller is required to free()). In either case
1593 * returns the current info.
1594 */
1595 const struct tgsi_token *
1596 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1597 const struct tgsi_token *tokens,
1598 struct tgsi_shader_info *info)
1599 {
1600 struct tgsi_lowering_context ctx;
1601 struct tgsi_token *newtoks;
1602 int newlen, numtmp;
1603
1604 /* sanity check in case limit is ever increased: */
1605 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1606
1607 /* sanity check the lowering */
1608 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1609 assert(!(config->lower_FRC && config->lower_TRUNC));
1610
1611 memset(&ctx, 0, sizeof(ctx));
1612 ctx.base.transform_instruction = transform_instr;
1613 ctx.info = info;
1614 ctx.config = config;
1615
1616 tgsi_scan_shader(tokens, info);
1617
1618 /* if we are adding fragment shader support to emulate two-sided
1619 * color, then figure out the number of additional inputs we need
1620 * to create for BCOLOR's..
1621 */
1622 if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1623 config->color_two_side) {
1624 int i;
1625 ctx.face_idx = -1;
1626 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1627 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1628 ctx.two_side_idx[ctx.two_side_colors++] = i;
1629 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1630 ctx.face_idx = i;
1631 }
1632 }
1633
1634 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1635
1636 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1637 /* if there are no instructions to lower, then we are done: */
1638 if (!(OPCS(DST) ||
1639 OPCS(XPD) ||
1640 OPCS(SCS) ||
1641 OPCS(LRP) ||
1642 OPCS(FRC) ||
1643 OPCS(POW) ||
1644 OPCS(LIT) ||
1645 OPCS(EXP) ||
1646 OPCS(LOG) ||
1647 OPCS(DP4) ||
1648 OPCS(DP3) ||
1649 OPCS(DPH) ||
1650 OPCS(DP2) ||
1651 OPCS(DP2A) ||
1652 OPCS(FLR) ||
1653 OPCS(CEIL) ||
1654 OPCS(TRUNC) ||
1655 OPCS(TXP) ||
1656 ctx.two_side_colors ||
1657 ctx.saturate))
1658 return NULL;
1659
1660 #if 0 /* debug */
1661 _debug_printf("BEFORE:");
1662 tgsi_dump(tokens, 0);
1663 #endif
1664
1665 numtmp = 0;
1666 newlen = tgsi_num_tokens(tokens);
1667 if (OPCS(DST)) {
1668 newlen += DST_GROW * OPCS(DST);
1669 numtmp = MAX2(numtmp, DST_TMP);
1670 }
1671 if (OPCS(XPD)) {
1672 newlen += XPD_GROW * OPCS(XPD);
1673 numtmp = MAX2(numtmp, XPD_TMP);
1674 }
1675 if (OPCS(SCS)) {
1676 newlen += SCS_GROW * OPCS(SCS);
1677 numtmp = MAX2(numtmp, SCS_TMP);
1678 }
1679 if (OPCS(LRP)) {
1680 newlen += LRP_GROW * OPCS(LRP);
1681 numtmp = MAX2(numtmp, LRP_TMP);
1682 }
1683 if (OPCS(FRC)) {
1684 newlen += FRC_GROW * OPCS(FRC);
1685 numtmp = MAX2(numtmp, FRC_TMP);
1686 }
1687 if (OPCS(POW)) {
1688 newlen += POW_GROW * OPCS(POW);
1689 numtmp = MAX2(numtmp, POW_TMP);
1690 }
1691 if (OPCS(LIT)) {
1692 newlen += LIT_GROW * OPCS(LIT);
1693 numtmp = MAX2(numtmp, LIT_TMP);
1694 }
1695 if (OPCS(EXP)) {
1696 newlen += EXP_GROW * OPCS(EXP);
1697 numtmp = MAX2(numtmp, EXP_TMP);
1698 }
1699 if (OPCS(LOG)) {
1700 newlen += LOG_GROW * OPCS(LOG);
1701 numtmp = MAX2(numtmp, LOG_TMP);
1702 }
1703 if (OPCS(DP4)) {
1704 newlen += DP4_GROW * OPCS(DP4);
1705 numtmp = MAX2(numtmp, DOTP_TMP);
1706 }
1707 if (OPCS(DP3)) {
1708 newlen += DP3_GROW * OPCS(DP3);
1709 numtmp = MAX2(numtmp, DOTP_TMP);
1710 }
1711 if (OPCS(DPH)) {
1712 newlen += DPH_GROW * OPCS(DPH);
1713 numtmp = MAX2(numtmp, DOTP_TMP);
1714 }
1715 if (OPCS(DP2)) {
1716 newlen += DP2_GROW * OPCS(DP2);
1717 numtmp = MAX2(numtmp, DOTP_TMP);
1718 }
1719 if (OPCS(DP2A)) {
1720 newlen += DP2A_GROW * OPCS(DP2A);
1721 numtmp = MAX2(numtmp, DOTP_TMP);
1722 }
1723 if (OPCS(FLR)) {
1724 newlen += FLR_GROW * OPCS(FLR);
1725 numtmp = MAX2(numtmp, FLR_TMP);
1726 }
1727 if (OPCS(CEIL)) {
1728 newlen += CEIL_GROW * OPCS(CEIL);
1729 numtmp = MAX2(numtmp, CEIL_TMP);
1730 }
1731 if (OPCS(TRUNC)) {
1732 newlen += TRUNC_GROW * OPCS(TRUNC);
1733 numtmp = MAX2(numtmp, TRUNC_TMP);
1734 }
1735 if (ctx.saturate || config->lower_TXP) {
1736 int n = 0;
1737
1738 if (ctx.saturate) {
1739 n = info->opcode_count[TGSI_OPCODE_TEX] +
1740 info->opcode_count[TGSI_OPCODE_TXP] +
1741 info->opcode_count[TGSI_OPCODE_TXB] +
1742 info->opcode_count[TGSI_OPCODE_TXB2] +
1743 info->opcode_count[TGSI_OPCODE_TXL];
1744 } else if (config->lower_TXP) {
1745 n = info->opcode_count[TGSI_OPCODE_TXP];
1746 }
1747
1748 newlen += SAMP_GROW * n;
1749 numtmp = MAX2(numtmp, SAMP_TMP);
1750 }
1751
1752 /* specifically don't include two_side_colors temps in the count: */
1753 ctx.numtmp = numtmp;
1754
1755 if (ctx.two_side_colors) {
1756 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1757 /* note: we permanently consume temp regs, re-writing references
1758 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1759 * instruction that selects which varying to use):
1760 */
1761 numtmp += ctx.two_side_colors;
1762 }
1763
1764 newlen += 2 * numtmp;
1765 newlen += 5; /* immediate */
1766
1767 newtoks = tgsi_alloc_tokens(newlen);
1768 if (!newtoks)
1769 return NULL;
1770
1771 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1772
1773 tgsi_scan_shader(newtoks, info);
1774
1775 #if 0 /* debug */
1776 _debug_printf("AFTER:");
1777 tgsi_dump(newtoks, 0);
1778 #endif
1779
1780 return newtoks;
1781 }