tgsi/lowering: add support for lowering TRUNC
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33
34 #include "tgsi_lowering.h"
35
36 struct tgsi_lowering_context {
37 struct tgsi_transform_context base;
38 const struct tgsi_lowering_config *config;
39 struct tgsi_shader_info *info;
40 unsigned two_side_colors;
41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
43 int face_idx;
44 unsigned numtmp;
45 struct {
46 struct tgsi_full_src_register src;
47 struct tgsi_full_dst_register dst;
48 } tmp[2];
49 #define A 0
50 #define B 1
51 struct tgsi_full_src_register imm;
52 int emitted_decls;
53 unsigned saturate;
54 };
55
56 static inline struct tgsi_lowering_context *
57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_lowering_context *)tctx;
60 }
61
62 /*
63 * Utility helpers:
64 */
65
66 static void
67 reg_dst(struct tgsi_full_dst_register *dst,
68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70 *dst = *orig_dst;
71 dst->Register.WriteMask &= wrmask;
72 assert(dst->Register.WriteMask);
73 }
74
75 static inline void
76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78 swiz[0] = src->SwizzleX;
79 swiz[1] = src->SwizzleY;
80 swiz[2] = src->SwizzleZ;
81 swiz[3] = src->SwizzleW;
82 }
83
84 static void
85 reg_src(struct tgsi_full_src_register *src,
86 const struct tgsi_full_src_register *orig_src,
87 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89 unsigned swiz[4];
90 get_swiz(swiz, &orig_src->Register);
91 *src = *orig_src;
92 src->Register.SwizzleX = swiz[sx];
93 src->Register.SwizzleY = swiz[sy];
94 src->Register.SwizzleZ = swiz[sz];
95 src->Register.SwizzleW = swiz[sw];
96 }
97
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102 /*
103 * if (dst.x aliases src.x) {
104 * MOV tmpA.x, src.x
105 * src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111 static bool
112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115 if ((dst->Register.File == src->Register.File) &&
116 (dst->Register.Index == src->Register.Index)) {
117 unsigned i, actual_mask = 0;
118 unsigned swiz[4];
119 get_swiz(swiz, &src->Register);
120 for (i = 0; i < 4; i++)
121 if (src_mask & (1 << i))
122 actual_mask |= (1 << swiz[i]);
123 if (actual_mask & dst_mask)
124 return true;
125 }
126 return false;
127 }
128
129 static void
130 create_mov(struct tgsi_transform_context *tctx,
131 const struct tgsi_full_dst_register *dst,
132 const struct tgsi_full_src_register *src,
133 unsigned mask, unsigned saturate)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.Saturate = saturate;
140 new_inst.Instruction.NumDstRegs = 1;
141 reg_dst(&new_inst.Dst[0], dst, mask);
142 new_inst.Instruction.NumSrcRegs = 1;
143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144 tctx->emit_instruction(tctx, &new_inst);
145 }
146
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 * 1 : instruction itself
154 * 1 : dst
155 * 1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 * 1 : instruction itself
159 * 2 : dst
160 * 2 * nargs : srcN
161 */
162
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
165
166 /*
167 * Lowering Translators:
168 */
169
170 /* DST - Distance Vector
171 * dst.x = 1.0
172 * dst.y = src0.y \times src1.y
173 * dst.z = src0.z
174 * dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
181 * src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
185 * src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP 2
195 static void
196 transform_dst(struct tgsi_transform_context *tctx,
197 struct tgsi_full_instruction *inst)
198 {
199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200 struct tgsi_full_dst_register *dst = &inst->Dst[0];
201 struct tgsi_full_src_register *src0 = &inst->Src[0];
202 struct tgsi_full_src_register *src1 = &inst->Src[1];
203 struct tgsi_full_instruction new_inst;
204
205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207 src0 = &ctx->tmp[A].src;
208 }
209
210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212 src1 = &ctx->tmp[B].src;
213 }
214
215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst = tgsi_default_full_instruction();
218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219 new_inst.Instruction.NumDstRegs = 1;
220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221 new_inst.Instruction.NumSrcRegs = 2;
222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224 tctx->emit_instruction(tctx, &new_inst);
225 }
226
227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228 /* MOV dst.z, src0.z */
229 new_inst = tgsi_default_full_instruction();
230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231 new_inst.Instruction.NumDstRegs = 1;
232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233 new_inst.Instruction.NumSrcRegs = 1;
234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235 tctx->emit_instruction(tctx, &new_inst);
236 }
237
238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239 /* MOV dst.w, src1.w */
240 new_inst = tgsi_default_full_instruction();
241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242 new_inst.Instruction.NumDstRegs = 1;
243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244 new_inst.Instruction.NumSrcRegs = 1;
245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246 tctx->emit_instruction(tctx, &new_inst);
247 }
248
249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst = tgsi_default_full_instruction();
252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253 new_inst.Instruction.NumDstRegs = 1;
254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255 new_inst.Instruction.NumSrcRegs = 1;
256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257 tctx->emit_instruction(tctx, &new_inst);
258 }
259 }
260
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
265 * dst.w = 1.0
266 *
267 * ; needs: 2 tmp, imm{1.0}
268 * MUL tmpA.xyz, src0.yzx, src1.zxy
269 * MUL tmpB.xyz, src1.yzx, src0.zxy
270 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
271 * MOV dst.w, imm{1.0}
272 */
273 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
274 #define XPD_TMP 2
275 static void
276 transform_xpd(struct tgsi_transform_context *tctx,
277 struct tgsi_full_instruction *inst)
278 {
279 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
280 struct tgsi_full_dst_register *dst = &inst->Dst[0];
281 struct tgsi_full_src_register *src0 = &inst->Src[0];
282 struct tgsi_full_src_register *src1 = &inst->Src[1];
283 struct tgsi_full_instruction new_inst;
284
285 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
286 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
287 new_inst = tgsi_default_full_instruction();
288 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
289 new_inst.Instruction.NumDstRegs = 1;
290 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
291 new_inst.Instruction.NumSrcRegs = 2;
292 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
293 reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
294 tctx->emit_instruction(tctx, &new_inst);
295
296 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
297 new_inst = tgsi_default_full_instruction();
298 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
299 new_inst.Instruction.NumDstRegs = 1;
300 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
301 new_inst.Instruction.NumSrcRegs = 2;
302 reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
303 reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
304 tctx->emit_instruction(tctx, &new_inst);
305
306 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
307 new_inst = tgsi_default_full_instruction();
308 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
309 new_inst.Instruction.NumDstRegs = 1;
310 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
311 new_inst.Instruction.NumSrcRegs = 2;
312 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
313 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _));
314 tctx->emit_instruction(tctx, &new_inst);
315 }
316
317 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
318 /* MOV dst.w, imm{1.0} */
319 new_inst = tgsi_default_full_instruction();
320 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
321 new_inst.Instruction.NumDstRegs = 1;
322 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
323 new_inst.Instruction.NumSrcRegs = 1;
324 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
325 tctx->emit_instruction(tctx, &new_inst);
326 }
327 }
328
329 /* SCS - Sine Cosine
330 * dst.x = \cos{src.x}
331 * dst.y = \sin{src.x}
332 * dst.z = 0.0
333 * dst.w = 1.0
334 *
335 * ; needs: 1 tmp, imm{0.0, 1.0}
336 * if (dst.x aliases src.x) {
337 * MOV tmpA.x, src.x
338 * src = tmpA
339 * }
340 * COS dst.x, src.x
341 * SIN dst.y, src.x
342 * MOV dst.zw, imm{0.0, 1.0}
343 */
344 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
345 #define SCS_TMP 1
346 static void
347 transform_scs(struct tgsi_transform_context *tctx,
348 struct tgsi_full_instruction *inst)
349 {
350 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
351 struct tgsi_full_dst_register *dst = &inst->Dst[0];
352 struct tgsi_full_src_register *src = &inst->Src[0];
353 struct tgsi_full_instruction new_inst;
354
355 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
356 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
357 src = &ctx->tmp[A].src;
358 }
359
360 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
361 /* COS dst.x, src.x */
362 new_inst = tgsi_default_full_instruction();
363 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
364 new_inst.Instruction.NumDstRegs = 1;
365 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
366 new_inst.Instruction.NumSrcRegs = 1;
367 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
368 tctx->emit_instruction(tctx, &new_inst);
369 }
370
371 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
372 /* SIN dst.y, src.x */
373 new_inst = tgsi_default_full_instruction();
374 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
375 new_inst.Instruction.NumDstRegs = 1;
376 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
377 new_inst.Instruction.NumSrcRegs = 1;
378 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
379 tctx->emit_instruction(tctx, &new_inst);
380 }
381
382 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
383 /* MOV dst.zw, imm{0.0, 1.0} */
384 new_inst = tgsi_default_full_instruction();
385 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
386 new_inst.Instruction.NumDstRegs = 1;
387 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
388 new_inst.Instruction.NumSrcRegs = 1;
389 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
390 tctx->emit_instruction(tctx, &new_inst);
391 }
392 }
393
394 /* LRP - Linear Interpolate
395 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
396 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
397 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
398 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
399 *
400 * ; needs: 2 tmp, imm{1.0}
401 * MUL tmpA, src0, src1
402 * SUB tmpB, imm{1.0}, src0
403 * MUL tmpB, tmpB, src2
404 * ADD dst, tmpA, tmpB
405 */
406 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
407 #define LRP_TMP 2
408 static void
409 transform_lrp(struct tgsi_transform_context *tctx,
410 struct tgsi_full_instruction *inst)
411 {
412 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
413 struct tgsi_full_dst_register *dst = &inst->Dst[0];
414 struct tgsi_full_src_register *src0 = &inst->Src[0];
415 struct tgsi_full_src_register *src1 = &inst->Src[1];
416 struct tgsi_full_src_register *src2 = &inst->Src[2];
417 struct tgsi_full_instruction new_inst;
418
419 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
420 /* MUL tmpA, src0, src1 */
421 new_inst = tgsi_default_full_instruction();
422 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
423 new_inst.Instruction.NumDstRegs = 1;
424 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
425 new_inst.Instruction.NumSrcRegs = 2;
426 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
427 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
428 tctx->emit_instruction(tctx, &new_inst);
429
430 /* SUB tmpB, imm{1.0}, src0 */
431 new_inst = tgsi_default_full_instruction();
432 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
433 new_inst.Instruction.NumDstRegs = 1;
434 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
435 new_inst.Instruction.NumSrcRegs = 2;
436 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
437 reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
438 tctx->emit_instruction(tctx, &new_inst);
439
440 /* MUL tmpB, tmpB, src2 */
441 new_inst = tgsi_default_full_instruction();
442 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
443 new_inst.Instruction.NumDstRegs = 1;
444 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
445 new_inst.Instruction.NumSrcRegs = 2;
446 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
447 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
448 tctx->emit_instruction(tctx, &new_inst);
449
450 /* ADD dst, tmpA, tmpB */
451 new_inst = tgsi_default_full_instruction();
452 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
453 new_inst.Instruction.NumDstRegs = 1;
454 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
455 new_inst.Instruction.NumSrcRegs = 2;
456 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
457 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
458 tctx->emit_instruction(tctx, &new_inst);
459 }
460 }
461
462 /* FRC - Fraction
463 * dst.x = src.x - \lfloor src.x\rfloor
464 * dst.y = src.y - \lfloor src.y\rfloor
465 * dst.z = src.z - \lfloor src.z\rfloor
466 * dst.w = src.w - \lfloor src.w\rfloor
467 *
468 * ; needs: 1 tmp
469 * FLR tmpA, src
470 * SUB dst, src, tmpA
471 */
472 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
473 #define FRC_TMP 1
474 static void
475 transform_frc(struct tgsi_transform_context *tctx,
476 struct tgsi_full_instruction *inst)
477 {
478 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
479 struct tgsi_full_dst_register *dst = &inst->Dst[0];
480 struct tgsi_full_src_register *src = &inst->Src[0];
481 struct tgsi_full_instruction new_inst;
482
483 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
484 /* FLR tmpA, src */
485 new_inst = tgsi_default_full_instruction();
486 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
487 new_inst.Instruction.NumDstRegs = 1;
488 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
489 new_inst.Instruction.NumSrcRegs = 1;
490 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
491 tctx->emit_instruction(tctx, &new_inst);
492
493 /* SUB dst, src, tmpA */
494 new_inst = tgsi_default_full_instruction();
495 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
496 new_inst.Instruction.NumDstRegs = 1;
497 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
498 new_inst.Instruction.NumSrcRegs = 2;
499 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
500 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
501 tctx->emit_instruction(tctx, &new_inst);
502 }
503 }
504
505 /* POW - Power
506 * dst.x = src0.x^{src1.x}
507 * dst.y = src0.x^{src1.x}
508 * dst.z = src0.x^{src1.x}
509 * dst.w = src0.x^{src1.x}
510 *
511 * ; needs: 1 tmp
512 * LG2 tmpA.x, src0.x
513 * MUL tmpA.x, src1.x, tmpA.x
514 * EX2 dst, tmpA.x
515 */
516 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
517 #define POW_TMP 1
518 static void
519 transform_pow(struct tgsi_transform_context *tctx,
520 struct tgsi_full_instruction *inst)
521 {
522 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
523 struct tgsi_full_dst_register *dst = &inst->Dst[0];
524 struct tgsi_full_src_register *src0 = &inst->Src[0];
525 struct tgsi_full_src_register *src1 = &inst->Src[1];
526 struct tgsi_full_instruction new_inst;
527
528 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
529 /* LG2 tmpA.x, src0.x */
530 new_inst = tgsi_default_full_instruction();
531 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
532 new_inst.Instruction.NumDstRegs = 1;
533 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
534 new_inst.Instruction.NumSrcRegs = 1;
535 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
536 tctx->emit_instruction(tctx, &new_inst);
537
538 /* MUL tmpA.x, src1.x, tmpA.x */
539 new_inst = tgsi_default_full_instruction();
540 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
541 new_inst.Instruction.NumDstRegs = 1;
542 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
543 new_inst.Instruction.NumSrcRegs = 2;
544 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
545 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
546 tctx->emit_instruction(tctx, &new_inst);
547
548 /* EX2 dst, tmpA.x */
549 new_inst = tgsi_default_full_instruction();
550 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
551 new_inst.Instruction.NumDstRegs = 1;
552 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
553 new_inst.Instruction.NumSrcRegs = 1;
554 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
555 tctx->emit_instruction(tctx, &new_inst);
556 }
557 }
558
559 /* LIT - Light Coefficients
560 * dst.x = 1.0
561 * dst.y = max(src.x, 0.0)
562 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
563 * dst.w = 1.0
564 *
565 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
566 * MAX tmpA.xy, src.xy, imm{0.0}
567 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
568 * LG2 tmpA.y, tmpA.y
569 * MUL tmpA.y, tmpA.z, tmpA.y
570 * EX2 tmpA.y, tmpA.y
571 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
572 * MOV dst.yz, tmpA.xy
573 * MOV dst.xw, imm{1.0}
574 */
575 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
576 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
577 #define LIT_TMP 1
578 static void
579 transform_lit(struct tgsi_transform_context *tctx,
580 struct tgsi_full_instruction *inst)
581 {
582 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
583 struct tgsi_full_dst_register *dst = &inst->Dst[0];
584 struct tgsi_full_src_register *src = &inst->Src[0];
585 struct tgsi_full_instruction new_inst;
586
587 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
588 /* MAX tmpA.xy, src.xy, imm{0.0} */
589 new_inst = tgsi_default_full_instruction();
590 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
591 new_inst.Instruction.NumDstRegs = 1;
592 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
593 new_inst.Instruction.NumSrcRegs = 2;
594 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
595 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
596 tctx->emit_instruction(tctx, &new_inst);
597
598 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
599 new_inst = tgsi_default_full_instruction();
600 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
601 new_inst.Instruction.NumDstRegs = 1;
602 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
603 new_inst.Instruction.NumSrcRegs = 3;
604 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
605 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
606 new_inst.Src[1].Register.Negate = true;
607 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
608 tctx->emit_instruction(tctx, &new_inst);
609
610 /* LG2 tmpA.y, tmpA.y */
611 new_inst = tgsi_default_full_instruction();
612 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
613 new_inst.Instruction.NumDstRegs = 1;
614 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
615 new_inst.Instruction.NumSrcRegs = 1;
616 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
617 tctx->emit_instruction(tctx, &new_inst);
618
619 /* MUL tmpA.y, tmpA.z, tmpA.y */
620 new_inst = tgsi_default_full_instruction();
621 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
622 new_inst.Instruction.NumDstRegs = 1;
623 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
624 new_inst.Instruction.NumSrcRegs = 2;
625 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
626 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
627 tctx->emit_instruction(tctx, &new_inst);
628
629 /* EX2 tmpA.y, tmpA.y */
630 new_inst = tgsi_default_full_instruction();
631 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
632 new_inst.Instruction.NumDstRegs = 1;
633 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
634 new_inst.Instruction.NumSrcRegs = 1;
635 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
636 tctx->emit_instruction(tctx, &new_inst);
637
638 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
639 new_inst = tgsi_default_full_instruction();
640 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
641 new_inst.Instruction.NumDstRegs = 1;
642 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
643 new_inst.Instruction.NumSrcRegs = 3;
644 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
645 new_inst.Src[0].Register.Negate = true;
646 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
647 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
648 tctx->emit_instruction(tctx, &new_inst);
649
650 /* MOV dst.yz, tmpA.xy */
651 new_inst = tgsi_default_full_instruction();
652 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
653 new_inst.Instruction.NumDstRegs = 1;
654 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
655 new_inst.Instruction.NumSrcRegs = 1;
656 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
657 tctx->emit_instruction(tctx, &new_inst);
658 }
659
660 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
661 /* MOV dst.xw, imm{1.0} */
662 new_inst = tgsi_default_full_instruction();
663 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
664 new_inst.Instruction.NumDstRegs = 1;
665 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
666 new_inst.Instruction.NumSrcRegs = 1;
667 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
668 tctx->emit_instruction(tctx, &new_inst);
669 }
670 }
671
672 /* EXP - Approximate Exponential Base 2
673 * dst.x = 2^{\lfloor src.x\rfloor}
674 * dst.y = src.x - \lfloor src.x\rfloor
675 * dst.z = 2^{src.x}
676 * dst.w = 1.0
677 *
678 * ; needs: 1 tmp, imm{1.0}
679 * if (lowering FLR) {
680 * FRC tmpA.x, src.x
681 * SUB tmpA.x, src.x, tmpA.x
682 * } else {
683 * FLR tmpA.x, src.x
684 * }
685 * EX2 tmpA.y, src.x
686 * SUB dst.y, src.x, tmpA.x
687 * EX2 dst.x, tmpA.x
688 * MOV dst.z, tmpA.y
689 * MOV dst.w, imm{1.0}
690 */
691 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
692 NINST(1)+ NINST(1) - OINST(1))
693 #define EXP_TMP 1
694 static void
695 transform_exp(struct tgsi_transform_context *tctx,
696 struct tgsi_full_instruction *inst)
697 {
698 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
699 struct tgsi_full_dst_register *dst = &inst->Dst[0];
700 struct tgsi_full_src_register *src = &inst->Src[0];
701 struct tgsi_full_instruction new_inst;
702
703 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
704 if (ctx->config->lower_FLR) {
705 /* FRC tmpA.x, src.x */
706 new_inst = tgsi_default_full_instruction();
707 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
708 new_inst.Instruction.NumDstRegs = 1;
709 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
710 new_inst.Instruction.NumSrcRegs = 1;
711 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
712 tctx->emit_instruction(tctx, &new_inst);
713
714 /* SUB tmpA.x, src.x, tmpA.x */
715 new_inst = tgsi_default_full_instruction();
716 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
717 new_inst.Instruction.NumDstRegs = 1;
718 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
719 new_inst.Instruction.NumSrcRegs = 2;
720 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
721 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
722 tctx->emit_instruction(tctx, &new_inst);
723 } else {
724 /* FLR tmpA.x, src.x */
725 new_inst = tgsi_default_full_instruction();
726 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
727 new_inst.Instruction.NumDstRegs = 1;
728 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
729 new_inst.Instruction.NumSrcRegs = 1;
730 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
731 tctx->emit_instruction(tctx, &new_inst);
732 }
733 }
734
735 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
736 /* EX2 tmpA.y, src.x */
737 new_inst = tgsi_default_full_instruction();
738 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
739 new_inst.Instruction.NumDstRegs = 1;
740 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
741 new_inst.Instruction.NumSrcRegs = 1;
742 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
743 tctx->emit_instruction(tctx, &new_inst);
744 }
745
746 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
747 /* SUB dst.y, src.x, tmpA.x */
748 new_inst = tgsi_default_full_instruction();
749 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
750 new_inst.Instruction.NumDstRegs = 1;
751 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
752 new_inst.Instruction.NumSrcRegs = 2;
753 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
754 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
755 tctx->emit_instruction(tctx, &new_inst);
756 }
757
758 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
759 /* EX2 dst.x, tmpA.x */
760 new_inst = tgsi_default_full_instruction();
761 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
762 new_inst.Instruction.NumDstRegs = 1;
763 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
764 new_inst.Instruction.NumSrcRegs = 1;
765 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
766 tctx->emit_instruction(tctx, &new_inst);
767 }
768
769 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
770 /* MOV dst.z, tmpA.y */
771 new_inst = tgsi_default_full_instruction();
772 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
773 new_inst.Instruction.NumDstRegs = 1;
774 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
775 new_inst.Instruction.NumSrcRegs = 1;
776 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
777 tctx->emit_instruction(tctx, &new_inst);
778 }
779
780 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
781 /* MOV dst.w, imm{1.0} */
782 new_inst = tgsi_default_full_instruction();
783 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
784 new_inst.Instruction.NumDstRegs = 1;
785 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
786 new_inst.Instruction.NumSrcRegs = 1;
787 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
788 tctx->emit_instruction(tctx, &new_inst);
789 }
790 }
791
792 /* LOG - Approximate Logarithm Base 2
793 * dst.x = \lfloor\log_2{|src.x|}\rfloor
794 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
795 * dst.z = \log_2{|src.x|}
796 * dst.w = 1.0
797 *
798 * ; needs: 1 tmp, imm{1.0}
799 * LG2 tmpA.x, |src.x|
800 * if (lowering FLR) {
801 * FRC tmpA.y, tmpA.x
802 * SUB tmpA.y, tmpA.x, tmpA.y
803 * } else {
804 * FLR tmpA.y, tmpA.x
805 * }
806 * EX2 tmpA.z, tmpA.y
807 * RCP tmpA.z, tmpA.z
808 * MUL dst.y, |src.x|, tmpA.z
809 * MOV dst.xz, tmpA.yx
810 * MOV dst.w, imm{1.0}
811 */
812 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
813 NINST(2) + NINST(1) + NINST(1) - OINST(1))
814 #define LOG_TMP 1
815 static void
816 transform_log(struct tgsi_transform_context *tctx,
817 struct tgsi_full_instruction *inst)
818 {
819 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
820 struct tgsi_full_dst_register *dst = &inst->Dst[0];
821 struct tgsi_full_src_register *src = &inst->Src[0];
822 struct tgsi_full_instruction new_inst;
823
824 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
825 /* LG2 tmpA.x, |src.x| */
826 new_inst = tgsi_default_full_instruction();
827 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
828 new_inst.Instruction.NumDstRegs = 1;
829 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
830 new_inst.Instruction.NumSrcRegs = 1;
831 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
832 new_inst.Src[0].Register.Absolute = true;
833 tctx->emit_instruction(tctx, &new_inst);
834 }
835
836 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
837 if (ctx->config->lower_FLR) {
838 /* FRC tmpA.y, tmpA.x */
839 new_inst = tgsi_default_full_instruction();
840 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
841 new_inst.Instruction.NumDstRegs = 1;
842 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
843 new_inst.Instruction.NumSrcRegs = 1;
844 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
845 tctx->emit_instruction(tctx, &new_inst);
846
847 /* SUB tmpA.y, tmpA.x, tmpA.y */
848 new_inst = tgsi_default_full_instruction();
849 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
850 new_inst.Instruction.NumDstRegs = 1;
851 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
852 new_inst.Instruction.NumSrcRegs = 2;
853 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
854 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
855 tctx->emit_instruction(tctx, &new_inst);
856 } else {
857 /* FLR tmpA.y, tmpA.x */
858 new_inst = tgsi_default_full_instruction();
859 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
860 new_inst.Instruction.NumDstRegs = 1;
861 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
862 new_inst.Instruction.NumSrcRegs = 1;
863 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
864 tctx->emit_instruction(tctx, &new_inst);
865 }
866 }
867
868 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
869 /* EX2 tmpA.z, tmpA.y */
870 new_inst = tgsi_default_full_instruction();
871 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
872 new_inst.Instruction.NumDstRegs = 1;
873 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
874 new_inst.Instruction.NumSrcRegs = 1;
875 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
876 tctx->emit_instruction(tctx, &new_inst);
877
878 /* RCP tmpA.z, tmpA.z */
879 new_inst = tgsi_default_full_instruction();
880 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
881 new_inst.Instruction.NumDstRegs = 1;
882 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
883 new_inst.Instruction.NumSrcRegs = 1;
884 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
885 tctx->emit_instruction(tctx, &new_inst);
886
887 /* MUL dst.y, |src.x|, tmpA.z */
888 new_inst = tgsi_default_full_instruction();
889 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
890 new_inst.Instruction.NumDstRegs = 1;
891 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
892 new_inst.Instruction.NumSrcRegs = 2;
893 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
894 new_inst.Src[0].Register.Absolute = true;
895 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
896 tctx->emit_instruction(tctx, &new_inst);
897 }
898
899 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
900 /* MOV dst.xz, tmpA.yx */
901 new_inst = tgsi_default_full_instruction();
902 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
903 new_inst.Instruction.NumDstRegs = 1;
904 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
905 new_inst.Instruction.NumSrcRegs = 1;
906 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
907 tctx->emit_instruction(tctx, &new_inst);
908 }
909
910 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
911 /* MOV dst.w, imm{1.0} */
912 new_inst = tgsi_default_full_instruction();
913 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
914 new_inst.Instruction.NumDstRegs = 1;
915 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
916 new_inst.Instruction.NumSrcRegs = 1;
917 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
918 tctx->emit_instruction(tctx, &new_inst);
919 }
920 }
921
922 /* DP4 - 4-component Dot Product
923 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
924 *
925 * DP3 - 3-component Dot Product
926 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
927 *
928 * DPH - Homogeneous Dot Product
929 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
930 *
931 * DP2 - 2-component Dot Product
932 * dst = src0.x \times src1.x + src0.y \times src1.y
933 *
934 * DP2A - 2-component Dot Product And Add
935 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
936 *
937 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
938 * operations, which is what you'd prefer for a ISA that is natively
939 * scalar. Probably a native vector ISA would at least already have
940 * DP4/DP3 instructions, but perhaps there is room for an alternative
941 * translation for DPH/DP2/DP2A using vector instructions.
942 *
943 * ; needs: 1 tmp
944 * MUL tmpA.x, src0.x, src1.x
945 * MAD tmpA.x, src0.y, src1.y, tmpA.x
946 * if (DPH || DP3 || DP4) {
947 * MAD tmpA.x, src0.z, src1.z, tmpA.x
948 * if (DPH) {
949 * ADD tmpA.x, src1.w, tmpA.x
950 * } else if (DP4) {
951 * MAD tmpA.x, src0.w, src1.w, tmpA.x
952 * }
953 * } else if (DP2A) {
954 * ADD tmpA.x, src2.x, tmpA.x
955 * }
956 * ; fixup last instruction to replicate into dst
957 */
958 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
959 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
960 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
961 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
962 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
963 #define DOTP_TMP 1
964 static void
965 transform_dotp(struct tgsi_transform_context *tctx,
966 struct tgsi_full_instruction *inst)
967 {
968 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
969 struct tgsi_full_dst_register *dst = &inst->Dst[0];
970 struct tgsi_full_src_register *src0 = &inst->Src[0];
971 struct tgsi_full_src_register *src1 = &inst->Src[1];
972 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
973 struct tgsi_full_instruction new_inst;
974 unsigned opcode = inst->Instruction.Opcode;
975
976 /* NOTE: any potential last instruction must replicate src on all
977 * components (since it could be re-written to write to final dst)
978 */
979
980 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
981 /* MUL tmpA.x, src0.x, src1.x */
982 new_inst = tgsi_default_full_instruction();
983 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
984 new_inst.Instruction.NumDstRegs = 1;
985 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
986 new_inst.Instruction.NumSrcRegs = 2;
987 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
988 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
989 tctx->emit_instruction(tctx, &new_inst);
990
991 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
992 new_inst = tgsi_default_full_instruction();
993 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
994 new_inst.Instruction.NumDstRegs = 1;
995 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
996 new_inst.Instruction.NumSrcRegs = 3;
997 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
998 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
999 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1000
1001 if ((opcode == TGSI_OPCODE_DPH) ||
1002 (opcode == TGSI_OPCODE_DP3) ||
1003 (opcode == TGSI_OPCODE_DP4)) {
1004 tctx->emit_instruction(tctx, &new_inst);
1005
1006 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
1007 new_inst = tgsi_default_full_instruction();
1008 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1009 new_inst.Instruction.NumDstRegs = 1;
1010 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1011 new_inst.Instruction.NumSrcRegs = 3;
1012 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
1013 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
1014 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1015
1016 if (opcode == TGSI_OPCODE_DPH) {
1017 tctx->emit_instruction(tctx, &new_inst);
1018
1019 /* ADD tmpA.x, src1.w, tmpA.x */
1020 new_inst = tgsi_default_full_instruction();
1021 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1022 new_inst.Instruction.NumDstRegs = 1;
1023 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1024 new_inst.Instruction.NumSrcRegs = 2;
1025 reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
1026 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1027 } else if (opcode == TGSI_OPCODE_DP4) {
1028 tctx->emit_instruction(tctx, &new_inst);
1029
1030 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
1031 new_inst = tgsi_default_full_instruction();
1032 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
1033 new_inst.Instruction.NumDstRegs = 1;
1034 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1035 new_inst.Instruction.NumSrcRegs = 3;
1036 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
1037 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
1038 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1039 }
1040 } else if (opcode == TGSI_OPCODE_DP2A) {
1041 tctx->emit_instruction(tctx, &new_inst);
1042
1043 /* ADD tmpA.x, src2.x, tmpA.x */
1044 new_inst = tgsi_default_full_instruction();
1045 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1046 new_inst.Instruction.NumDstRegs = 1;
1047 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
1048 new_inst.Instruction.NumSrcRegs = 2;
1049 reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
1050 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
1051 }
1052
1053 /* fixup last instruction to write to dst: */
1054 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1055
1056 tctx->emit_instruction(tctx, &new_inst);
1057 }
1058 }
1059
1060 /* FLR - floor, CEIL - ceil
1061 * ; needs: 1 tmp
1062 * if (CEIL) {
1063 * FRC tmpA, -src
1064 * ADD dst, src, tmpA
1065 * } else {
1066 * FRC tmpA, src
1067 * SUB dst, src, tmpA
1068 * }
1069 */
1070 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
1071 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
1072 #define FLR_TMP 1
1073 #define CEIL_TMP 1
1074 static void
1075 transform_flr_ceil(struct tgsi_transform_context *tctx,
1076 struct tgsi_full_instruction *inst)
1077 {
1078 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1079 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1080 struct tgsi_full_src_register *src0 = &inst->Src[0];
1081 struct tgsi_full_instruction new_inst;
1082 unsigned opcode = inst->Instruction.Opcode;
1083
1084 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1085 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
1086 new_inst = tgsi_default_full_instruction();
1087 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1088 new_inst.Instruction.NumDstRegs = 1;
1089 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1090 new_inst.Instruction.NumSrcRegs = 1;
1091 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1092
1093 if (opcode == TGSI_OPCODE_CEIL)
1094 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
1095 tctx->emit_instruction(tctx, &new_inst);
1096
1097 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
1098 new_inst = tgsi_default_full_instruction();
1099 if (opcode == TGSI_OPCODE_CEIL)
1100 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
1101 else
1102 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
1103 new_inst.Instruction.NumDstRegs = 1;
1104 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1105 new_inst.Instruction.NumSrcRegs = 2;
1106 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1107 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1108 tctx->emit_instruction(tctx, &new_inst);
1109 }
1110 }
1111
1112 /* TRUNC - truncate off fractional part
1113 * dst.x = trunc(src.x)
1114 * dst.y = trunc(src.y)
1115 * dst.z = trunc(src.z)
1116 * dst.w = trunc(src.w)
1117 *
1118 * ; needs: 1 tmp
1119 * if (lower FLR) {
1120 * FRC tmpA, |src|
1121 * SUB tmpA, |src|, tmpA
1122 * } else {
1123 * FLR tmpA, |src|
1124 * }
1125 * CMP dst, src, -tmpA, tmpA
1126 */
1127 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
1128 #define TRUNC_TMP 1
1129 static void
1130 transform_trunc(struct tgsi_transform_context *tctx,
1131 struct tgsi_full_instruction *inst)
1132 {
1133 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1134 struct tgsi_full_dst_register *dst = &inst->Dst[0];
1135 struct tgsi_full_src_register *src0 = &inst->Src[0];
1136 struct tgsi_full_instruction new_inst;
1137
1138 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
1139 if (ctx->config->lower_FLR) {
1140 new_inst = tgsi_default_full_instruction();
1141 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
1142 new_inst.Instruction.NumDstRegs = 1;
1143 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1144 new_inst.Instruction.NumSrcRegs = 1;
1145 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1146 new_inst.Src[0].Register.Absolute = true;
1147 new_inst.Src[0].Register.Negate = false;
1148 tctx->emit_instruction(tctx, &new_inst);
1149
1150 new_inst = tgsi_default_full_instruction();
1151 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
1152 new_inst.Instruction.NumDstRegs = 1;
1153 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1154 new_inst.Instruction.NumSrcRegs = 2;
1155 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1156 new_inst.Src[0].Register.Absolute = true;
1157 new_inst.Src[0].Register.Negate = false;
1158 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1159 tctx->emit_instruction(tctx, &new_inst);
1160 } else {
1161 new_inst = tgsi_default_full_instruction();
1162 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
1163 new_inst.Instruction.NumDstRegs = 1;
1164 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
1165 new_inst.Instruction.NumSrcRegs = 1;
1166 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1167 new_inst.Src[0].Register.Absolute = true;
1168 new_inst.Src[0].Register.Negate = false;
1169 tctx->emit_instruction(tctx, &new_inst);
1170 }
1171
1172 new_inst = tgsi_default_full_instruction();
1173 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1174 new_inst.Instruction.NumDstRegs = 1;
1175 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1176 new_inst.Instruction.NumSrcRegs = 3;
1177 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1178 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1179 new_inst.Src[1].Register.Negate = true;
1180 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1181 tctx->emit_instruction(tctx, &new_inst);
1182 }
1183 }
1184
1185 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1186 * in the case of TXP, the clamping must happen *after* projection, so
1187 * we need to lower TXP to TEX.
1188 *
1189 * MOV tmpA, src0
1190 * if (opc == TXP) {
1191 * ; do perspective division manually before clamping:
1192 * RCP tmpB, tmpA.w
1193 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1194 * opc = TEX;
1195 * }
1196 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1197 * <opc> dst, tmpA, ...
1198 */
1199 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1200 #define SAMP_TMP 2
1201 static int
1202 transform_samp(struct tgsi_transform_context *tctx,
1203 struct tgsi_full_instruction *inst)
1204 {
1205 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1206 struct tgsi_full_src_register *coord = &inst->Src[0];
1207 struct tgsi_full_src_register *samp;
1208 struct tgsi_full_instruction new_inst;
1209 /* mask is clamped coords, pmask is all coords (for projection): */
1210 unsigned mask = 0, pmask = 0, smask;
1211 unsigned tex = inst->Texture.Texture;
1212 unsigned opcode = inst->Instruction.Opcode;
1213 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1214 (ctx->config->lower_TXP & (1 << tex));
1215
1216 if (opcode == TGSI_OPCODE_TXB2) {
1217 samp = &inst->Src[2];
1218 } else {
1219 samp = &inst->Src[1];
1220 }
1221
1222 /* convert sampler # to bitmask to test: */
1223 smask = 1 << samp->Register.Index;
1224
1225 /* check if we actually need to lower this one: */
1226 if (!(ctx->saturate & smask) && !lower_txp)
1227 return -1;
1228
1229 /* figure out which coordinates need saturating:
1230 * - RECT textures should not get saturated
1231 * - array index coords should not get saturated
1232 */
1233 switch (tex) {
1234 case TGSI_TEXTURE_3D:
1235 case TGSI_TEXTURE_CUBE:
1236 case TGSI_TEXTURE_CUBE_ARRAY:
1237 case TGSI_TEXTURE_SHADOWCUBE:
1238 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1239 if (ctx->config->saturate_r & smask)
1240 mask |= TGSI_WRITEMASK_Z;
1241 pmask |= TGSI_WRITEMASK_Z;
1242 /* fallthrough */
1243
1244 case TGSI_TEXTURE_2D:
1245 case TGSI_TEXTURE_2D_ARRAY:
1246 case TGSI_TEXTURE_SHADOW2D:
1247 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1248 case TGSI_TEXTURE_2D_MSAA:
1249 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1250 if (ctx->config->saturate_t & smask)
1251 mask |= TGSI_WRITEMASK_Y;
1252 pmask |= TGSI_WRITEMASK_Y;
1253 /* fallthrough */
1254
1255 case TGSI_TEXTURE_1D:
1256 case TGSI_TEXTURE_1D_ARRAY:
1257 case TGSI_TEXTURE_SHADOW1D:
1258 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1259 if (ctx->config->saturate_s & smask)
1260 mask |= TGSI_WRITEMASK_X;
1261 pmask |= TGSI_WRITEMASK_X;
1262 break;
1263
1264 case TGSI_TEXTURE_RECT:
1265 case TGSI_TEXTURE_SHADOWRECT:
1266 /* we don't saturate, but in case of lower_txp we
1267 * still need to do the perspective divide:
1268 */
1269 pmask = TGSI_WRITEMASK_XY;
1270 break;
1271 }
1272
1273 /* sanity check.. driver could be asking to saturate a non-
1274 * existent coordinate component:
1275 */
1276 if (!mask && !lower_txp)
1277 return -1;
1278
1279 /* MOV tmpA, src0 */
1280 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1281
1282 /* This is a bit sad.. we need to clamp *after* the coords
1283 * are projected, which means lowering TXP to TEX and doing
1284 * the projection ourself. But since I haven't figured out
1285 * how to make the lowering code deliver an electric shock
1286 * to anyone using GL_CLAMP, we must do this instead:
1287 */
1288 if (opcode == TGSI_OPCODE_TXP) {
1289 /* RCP tmpB.x tmpA.w */
1290 new_inst = tgsi_default_full_instruction();
1291 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1292 new_inst.Instruction.NumDstRegs = 1;
1293 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1294 new_inst.Instruction.NumSrcRegs = 1;
1295 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1296 tctx->emit_instruction(tctx, &new_inst);
1297
1298 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1299 new_inst = tgsi_default_full_instruction();
1300 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1301 new_inst.Instruction.NumDstRegs = 1;
1302 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1303 new_inst.Instruction.NumSrcRegs = 2;
1304 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1305 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1306 tctx->emit_instruction(tctx, &new_inst);
1307
1308 opcode = TGSI_OPCODE_TEX;
1309 }
1310
1311 /* MOV_SAT tmpA.<mask>, tmpA */
1312 if (mask) {
1313 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1314 }
1315
1316 /* modify the texture samp instruction to take fixed up coord: */
1317 new_inst = *inst;
1318 new_inst.Instruction.Opcode = opcode;
1319 new_inst.Src[0] = ctx->tmp[A].src;
1320 tctx->emit_instruction(tctx, &new_inst);
1321
1322 return 0;
1323 }
1324
1325 /* Two-sided color emulation:
1326 * For each COLOR input, create a corresponding BCOLOR input, plus
1327 * CMP instruction to select front or back color based on FACE
1328 */
1329 #define TWOSIDE_GROW(n) ( \
1330 2 + /* FACE */ \
1331 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1332 ((n) * 1) + /* TEMP[] */ \
1333 ((n) * NINST(3)) /* CMP instr */ \
1334 )
1335
1336 static void
1337 emit_twoside(struct tgsi_transform_context *tctx)
1338 {
1339 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1340 struct tgsi_shader_info *info = ctx->info;
1341 struct tgsi_full_declaration decl;
1342 struct tgsi_full_instruction new_inst;
1343 unsigned inbase, tmpbase;
1344 int i;
1345
1346 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1347 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1348
1349 /* additional inputs for BCOLOR's */
1350 for (i = 0; i < ctx->two_side_colors; i++) {
1351 unsigned in_idx = ctx->two_side_idx[i];
1352 decl = tgsi_default_full_declaration();
1353 decl.Declaration.File = TGSI_FILE_INPUT;
1354 decl.Declaration.Semantic = true;
1355 decl.Range.First = decl.Range.Last = inbase + i;
1356 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1357 decl.Semantic.Index = info->input_semantic_index[in_idx];
1358 decl.Declaration.Interpolate = true;
1359 decl.Interp.Interpolate = info->input_interpolate[in_idx];
1360 decl.Interp.Location = info->input_interpolate_loc[in_idx];
1361 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1362 tctx->emit_declaration(tctx, &decl);
1363 }
1364
1365 /* additional input for FACE */
1366 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1367 decl = tgsi_default_full_declaration();
1368 decl.Declaration.File = TGSI_FILE_INPUT;
1369 decl.Declaration.Semantic = true;
1370 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1371 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1372 decl.Semantic.Index = 0;
1373 tctx->emit_declaration(tctx, &decl);
1374
1375 ctx->face_idx = decl.Range.First;
1376 }
1377
1378 /* additional temps for COLOR/BCOLOR selection: */
1379 for (i = 0; i < ctx->two_side_colors; i++) {
1380 decl = tgsi_default_full_declaration();
1381 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1382 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1383 tctx->emit_declaration(tctx, &decl);
1384 }
1385
1386 /* and finally additional instructions to select COLOR/BCOLOR: */
1387 for (i = 0; i < ctx->two_side_colors; i++) {
1388 new_inst = tgsi_default_full_instruction();
1389 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1390
1391 new_inst.Instruction.NumDstRegs = 1;
1392 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1393 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1394 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1395
1396 new_inst.Instruction.NumSrcRegs = 3;
1397 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1398 new_inst.Src[0].Register.Index = ctx->face_idx;
1399 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1400 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1401 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1402 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1403 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1404 new_inst.Src[1].Register.Index = inbase + i;
1405 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1406 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1407 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1408 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1409 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1410 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1411 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1412 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1413 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1414 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1415
1416 tctx->emit_instruction(tctx, &new_inst);
1417 }
1418 }
1419
1420 static void
1421 emit_decls(struct tgsi_transform_context *tctx)
1422 {
1423 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1424 struct tgsi_shader_info *info = ctx->info;
1425 struct tgsi_full_declaration decl;
1426 struct tgsi_full_immediate immed;
1427 unsigned tmpbase;
1428 int i;
1429
1430 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1431
1432 ctx->color_base = tmpbase + ctx->numtmp;
1433
1434 /* declare immediate: */
1435 immed = tgsi_default_full_immediate();
1436 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1437 immed.u[0].Float = 0.0;
1438 immed.u[1].Float = 1.0;
1439 immed.u[2].Float = 128.0;
1440 immed.u[3].Float = 0.0;
1441 tctx->emit_immediate(tctx, &immed);
1442
1443 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1444 ctx->imm.Register.Index = info->immediate_count;
1445 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1446 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1447 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1448 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1449
1450 /* declare temp regs: */
1451 for (i = 0; i < ctx->numtmp; i++) {
1452 decl = tgsi_default_full_declaration();
1453 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1454 decl.Range.First = decl.Range.Last = tmpbase + i;
1455 tctx->emit_declaration(tctx, &decl);
1456
1457 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1458 ctx->tmp[i].src.Register.Index = tmpbase + i;
1459 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1460 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1461 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1462 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1463
1464 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1465 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1466 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1467 }
1468
1469 if (ctx->two_side_colors)
1470 emit_twoside(tctx);
1471 }
1472
1473 static void
1474 rename_color_inputs(struct tgsi_lowering_context *ctx,
1475 struct tgsi_full_instruction *inst)
1476 {
1477 unsigned i, j;
1478 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1479 struct tgsi_src_register *src = &inst->Src[i].Register;
1480 if (src->File == TGSI_FILE_INPUT) {
1481 for (j = 0; j < ctx->two_side_colors; j++) {
1482 if (src->Index == ctx->two_side_idx[j]) {
1483 src->File = TGSI_FILE_TEMPORARY;
1484 src->Index = ctx->color_base + j;
1485 break;
1486 }
1487 }
1488 }
1489 }
1490
1491 }
1492
1493 static void
1494 transform_instr(struct tgsi_transform_context *tctx,
1495 struct tgsi_full_instruction *inst)
1496 {
1497 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1498
1499 if (!ctx->emitted_decls) {
1500 emit_decls(tctx);
1501 ctx->emitted_decls = 1;
1502 }
1503
1504 /* if emulating two-sided-color, we need to re-write some
1505 * src registers:
1506 */
1507 if (ctx->two_side_colors)
1508 rename_color_inputs(ctx, inst);
1509
1510 switch (inst->Instruction.Opcode) {
1511 case TGSI_OPCODE_DST:
1512 if (!ctx->config->lower_DST)
1513 goto skip;
1514 transform_dst(tctx, inst);
1515 break;
1516 case TGSI_OPCODE_XPD:
1517 if (!ctx->config->lower_XPD)
1518 goto skip;
1519 transform_xpd(tctx, inst);
1520 break;
1521 case TGSI_OPCODE_SCS:
1522 if (!ctx->config->lower_SCS)
1523 goto skip;
1524 transform_scs(tctx, inst);
1525 break;
1526 case TGSI_OPCODE_LRP:
1527 if (!ctx->config->lower_LRP)
1528 goto skip;
1529 transform_lrp(tctx, inst);
1530 break;
1531 case TGSI_OPCODE_FRC:
1532 if (!ctx->config->lower_FRC)
1533 goto skip;
1534 transform_frc(tctx, inst);
1535 break;
1536 case TGSI_OPCODE_POW:
1537 if (!ctx->config->lower_POW)
1538 goto skip;
1539 transform_pow(tctx, inst);
1540 break;
1541 case TGSI_OPCODE_LIT:
1542 if (!ctx->config->lower_LIT)
1543 goto skip;
1544 transform_lit(tctx, inst);
1545 break;
1546 case TGSI_OPCODE_EXP:
1547 if (!ctx->config->lower_EXP)
1548 goto skip;
1549 transform_exp(tctx, inst);
1550 break;
1551 case TGSI_OPCODE_LOG:
1552 if (!ctx->config->lower_LOG)
1553 goto skip;
1554 transform_log(tctx, inst);
1555 break;
1556 case TGSI_OPCODE_DP4:
1557 if (!ctx->config->lower_DP4)
1558 goto skip;
1559 transform_dotp(tctx, inst);
1560 break;
1561 case TGSI_OPCODE_DP3:
1562 if (!ctx->config->lower_DP3)
1563 goto skip;
1564 transform_dotp(tctx, inst);
1565 break;
1566 case TGSI_OPCODE_DPH:
1567 if (!ctx->config->lower_DPH)
1568 goto skip;
1569 transform_dotp(tctx, inst);
1570 break;
1571 case TGSI_OPCODE_DP2:
1572 if (!ctx->config->lower_DP2)
1573 goto skip;
1574 transform_dotp(tctx, inst);
1575 break;
1576 case TGSI_OPCODE_DP2A:
1577 if (!ctx->config->lower_DP2A)
1578 goto skip;
1579 transform_dotp(tctx, inst);
1580 break;
1581 case TGSI_OPCODE_FLR:
1582 if (!ctx->config->lower_FLR)
1583 goto skip;
1584 transform_flr_ceil(tctx, inst);
1585 break;
1586 case TGSI_OPCODE_CEIL:
1587 if (!ctx->config->lower_CEIL)
1588 goto skip;
1589 transform_flr_ceil(tctx, inst);
1590 break;
1591 case TGSI_OPCODE_TRUNC:
1592 if (!ctx->config->lower_TRUNC)
1593 goto skip;
1594 transform_trunc(tctx, inst);
1595 break;
1596 case TGSI_OPCODE_TEX:
1597 case TGSI_OPCODE_TXP:
1598 case TGSI_OPCODE_TXB:
1599 case TGSI_OPCODE_TXB2:
1600 case TGSI_OPCODE_TXL:
1601 if (transform_samp(tctx, inst))
1602 goto skip;
1603 break;
1604 default:
1605 skip:
1606 tctx->emit_instruction(tctx, inst);
1607 break;
1608 }
1609 }
1610
1611 /* returns NULL if no lowering required, else returns the new
1612 * tokens (which caller is required to free()). In either case
1613 * returns the current info.
1614 */
1615 const struct tgsi_token *
1616 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1617 const struct tgsi_token *tokens,
1618 struct tgsi_shader_info *info)
1619 {
1620 struct tgsi_lowering_context ctx;
1621 struct tgsi_token *newtoks;
1622 int newlen, numtmp;
1623
1624 /* sanity check in case limit is ever increased: */
1625 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1626
1627 /* sanity check the lowering */
1628 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1629 assert(!(config->lower_FRC && config->lower_TRUNC));
1630
1631 memset(&ctx, 0, sizeof(ctx));
1632 ctx.base.transform_instruction = transform_instr;
1633 ctx.info = info;
1634 ctx.config = config;
1635
1636 tgsi_scan_shader(tokens, info);
1637
1638 /* if we are adding fragment shader support to emulate two-sided
1639 * color, then figure out the number of additional inputs we need
1640 * to create for BCOLOR's..
1641 */
1642 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1643 config->color_two_side) {
1644 int i;
1645 ctx.face_idx = -1;
1646 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1647 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1648 ctx.two_side_idx[ctx.two_side_colors++] = i;
1649 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1650 ctx.face_idx = i;
1651 }
1652 }
1653
1654 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1655
1656 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1657 /* if there are no instructions to lower, then we are done: */
1658 if (!(OPCS(DST) ||
1659 OPCS(XPD) ||
1660 OPCS(SCS) ||
1661 OPCS(LRP) ||
1662 OPCS(FRC) ||
1663 OPCS(POW) ||
1664 OPCS(LIT) ||
1665 OPCS(EXP) ||
1666 OPCS(LOG) ||
1667 OPCS(DP4) ||
1668 OPCS(DP3) ||
1669 OPCS(DPH) ||
1670 OPCS(DP2) ||
1671 OPCS(DP2A) ||
1672 OPCS(FLR) ||
1673 OPCS(CEIL) ||
1674 OPCS(TRUNC) ||
1675 OPCS(TXP) ||
1676 ctx.two_side_colors ||
1677 ctx.saturate))
1678 return NULL;
1679
1680 #if 0 /* debug */
1681 _debug_printf("BEFORE:");
1682 tgsi_dump(tokens, 0);
1683 #endif
1684
1685 numtmp = 0;
1686 newlen = tgsi_num_tokens(tokens);
1687 if (OPCS(DST)) {
1688 newlen += DST_GROW * OPCS(DST);
1689 numtmp = MAX2(numtmp, DST_TMP);
1690 }
1691 if (OPCS(XPD)) {
1692 newlen += XPD_GROW * OPCS(XPD);
1693 numtmp = MAX2(numtmp, XPD_TMP);
1694 }
1695 if (OPCS(SCS)) {
1696 newlen += SCS_GROW * OPCS(SCS);
1697 numtmp = MAX2(numtmp, SCS_TMP);
1698 }
1699 if (OPCS(LRP)) {
1700 newlen += LRP_GROW * OPCS(LRP);
1701 numtmp = MAX2(numtmp, LRP_TMP);
1702 }
1703 if (OPCS(FRC)) {
1704 newlen += FRC_GROW * OPCS(FRC);
1705 numtmp = MAX2(numtmp, FRC_TMP);
1706 }
1707 if (OPCS(POW)) {
1708 newlen += POW_GROW * OPCS(POW);
1709 numtmp = MAX2(numtmp, POW_TMP);
1710 }
1711 if (OPCS(LIT)) {
1712 newlen += LIT_GROW * OPCS(LIT);
1713 numtmp = MAX2(numtmp, LIT_TMP);
1714 }
1715 if (OPCS(EXP)) {
1716 newlen += EXP_GROW * OPCS(EXP);
1717 numtmp = MAX2(numtmp, EXP_TMP);
1718 }
1719 if (OPCS(LOG)) {
1720 newlen += LOG_GROW * OPCS(LOG);
1721 numtmp = MAX2(numtmp, LOG_TMP);
1722 }
1723 if (OPCS(DP4)) {
1724 newlen += DP4_GROW * OPCS(DP4);
1725 numtmp = MAX2(numtmp, DOTP_TMP);
1726 }
1727 if (OPCS(DP3)) {
1728 newlen += DP3_GROW * OPCS(DP3);
1729 numtmp = MAX2(numtmp, DOTP_TMP);
1730 }
1731 if (OPCS(DPH)) {
1732 newlen += DPH_GROW * OPCS(DPH);
1733 numtmp = MAX2(numtmp, DOTP_TMP);
1734 }
1735 if (OPCS(DP2)) {
1736 newlen += DP2_GROW * OPCS(DP2);
1737 numtmp = MAX2(numtmp, DOTP_TMP);
1738 }
1739 if (OPCS(DP2A)) {
1740 newlen += DP2A_GROW * OPCS(DP2A);
1741 numtmp = MAX2(numtmp, DOTP_TMP);
1742 }
1743 if (OPCS(FLR)) {
1744 newlen += FLR_GROW * OPCS(FLR);
1745 numtmp = MAX2(numtmp, FLR_TMP);
1746 }
1747 if (OPCS(CEIL)) {
1748 newlen += CEIL_GROW * OPCS(CEIL);
1749 numtmp = MAX2(numtmp, CEIL_TMP);
1750 }
1751 if (OPCS(TRUNC)) {
1752 newlen += TRUNC_GROW * OPCS(TRUNC);
1753 numtmp = MAX2(numtmp, TRUNC_TMP);
1754 }
1755 if (ctx.saturate || config->lower_TXP) {
1756 int n = 0;
1757
1758 if (ctx.saturate) {
1759 n = info->opcode_count[TGSI_OPCODE_TEX] +
1760 info->opcode_count[TGSI_OPCODE_TXP] +
1761 info->opcode_count[TGSI_OPCODE_TXB] +
1762 info->opcode_count[TGSI_OPCODE_TXB2] +
1763 info->opcode_count[TGSI_OPCODE_TXL];
1764 } else if (config->lower_TXP) {
1765 n = info->opcode_count[TGSI_OPCODE_TXP];
1766 }
1767
1768 newlen += SAMP_GROW * n;
1769 numtmp = MAX2(numtmp, SAMP_TMP);
1770 }
1771
1772 /* specifically don't include two_side_colors temps in the count: */
1773 ctx.numtmp = numtmp;
1774
1775 if (ctx.two_side_colors) {
1776 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1777 /* note: we permanently consume temp regs, re-writing references
1778 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1779 * instruction that selects which varying to use):
1780 */
1781 numtmp += ctx.two_side_colors;
1782 }
1783
1784 newlen += 2 * numtmp;
1785 newlen += 5; /* immediate */
1786
1787 newtoks = tgsi_alloc_tokens(newlen);
1788 if (!newtoks)
1789 return NULL;
1790
1791 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1792
1793 tgsi_scan_shader(newtoks, info);
1794
1795 #if 0 /* debug */
1796 _debug_printf("AFTER:");
1797 tgsi_dump(newtoks, 0);
1798 #endif
1799
1800 return newtoks;
1801 }