tgsi/lowering: add support to lower TXP (v2)
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33
34 #include "tgsi_lowering.h"
35
36 struct tgsi_lowering_context {
37 struct tgsi_transform_context base;
38 const struct tgsi_lowering_config *config;
39 struct tgsi_shader_info *info;
40 unsigned two_side_colors;
41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
43 int face_idx;
44 unsigned numtmp;
45 struct {
46 struct tgsi_full_src_register src;
47 struct tgsi_full_dst_register dst;
48 } tmp[2];
49 #define A 0
50 #define B 1
51 struct tgsi_full_src_register imm;
52 int emitted_decls;
53 unsigned saturate;
54 };
55
56 static inline struct tgsi_lowering_context *
57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_lowering_context *)tctx;
60 }
61
62 /*
63 * Utility helpers:
64 */
65
66 static void
67 reg_dst(struct tgsi_full_dst_register *dst,
68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70 *dst = *orig_dst;
71 dst->Register.WriteMask &= wrmask;
72 assert(dst->Register.WriteMask);
73 }
74
75 static inline void
76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78 swiz[0] = src->SwizzleX;
79 swiz[1] = src->SwizzleY;
80 swiz[2] = src->SwizzleZ;
81 swiz[3] = src->SwizzleW;
82 }
83
84 static void
85 reg_src(struct tgsi_full_src_register *src,
86 const struct tgsi_full_src_register *orig_src,
87 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89 unsigned swiz[4];
90 get_swiz(swiz, &orig_src->Register);
91 *src = *orig_src;
92 src->Register.SwizzleX = swiz[sx];
93 src->Register.SwizzleY = swiz[sy];
94 src->Register.SwizzleZ = swiz[sz];
95 src->Register.SwizzleW = swiz[sw];
96 }
97
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102 /*
103 * if (dst.x aliases src.x) {
104 * MOV tmpA.x, src.x
105 * src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111 static bool
112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115 if ((dst->Register.File == src->Register.File) &&
116 (dst->Register.Index == src->Register.Index)) {
117 unsigned i, actual_mask = 0;
118 unsigned swiz[4];
119 get_swiz(swiz, &src->Register);
120 for (i = 0; i < 4; i++)
121 if (src_mask & (1 << i))
122 actual_mask |= (1 << swiz[i]);
123 if (actual_mask & dst_mask)
124 return true;
125 }
126 return false;
127 }
128
129 static void
130 create_mov(struct tgsi_transform_context *tctx,
131 const struct tgsi_full_dst_register *dst,
132 const struct tgsi_full_src_register *src,
133 unsigned mask, unsigned saturate)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.Saturate = saturate;
140 new_inst.Instruction.NumDstRegs = 1;
141 reg_dst(&new_inst.Dst[0], dst, mask);
142 new_inst.Instruction.NumSrcRegs = 1;
143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144 tctx->emit_instruction(tctx, &new_inst);
145 }
146
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 * 1 : instruction itself
154 * 1 : dst
155 * 1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 * 1 : instruction itself
159 * 2 : dst
160 * 2 * nargs : srcN
161 */
162
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
165
166 /*
167 * Lowering Translators:
168 */
169
170 /* DST - Distance Vector
171 * dst.x = 1.0
172 * dst.y = src0.y \times src1.y
173 * dst.z = src0.z
174 * dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
181 * src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
185 * src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP 2
195 static void
196 transform_dst(struct tgsi_transform_context *tctx,
197 struct tgsi_full_instruction *inst)
198 {
199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200 struct tgsi_full_dst_register *dst = &inst->Dst[0];
201 struct tgsi_full_src_register *src0 = &inst->Src[0];
202 struct tgsi_full_src_register *src1 = &inst->Src[1];
203 struct tgsi_full_instruction new_inst;
204
205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207 src0 = &ctx->tmp[A].src;
208 }
209
210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212 src1 = &ctx->tmp[B].src;
213 }
214
215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst = tgsi_default_full_instruction();
218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219 new_inst.Instruction.NumDstRegs = 1;
220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221 new_inst.Instruction.NumSrcRegs = 2;
222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224 tctx->emit_instruction(tctx, &new_inst);
225 }
226
227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228 /* MOV dst.z, src0.z */
229 new_inst = tgsi_default_full_instruction();
230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231 new_inst.Instruction.NumDstRegs = 1;
232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233 new_inst.Instruction.NumSrcRegs = 1;
234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235 tctx->emit_instruction(tctx, &new_inst);
236 }
237
238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239 /* MOV dst.w, src1.w */
240 new_inst = tgsi_default_full_instruction();
241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242 new_inst.Instruction.NumDstRegs = 1;
243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244 new_inst.Instruction.NumSrcRegs = 1;
245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246 tctx->emit_instruction(tctx, &new_inst);
247 }
248
249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst = tgsi_default_full_instruction();
252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253 new_inst.Instruction.NumDstRegs = 1;
254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255 new_inst.Instruction.NumSrcRegs = 1;
256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257 tctx->emit_instruction(tctx, &new_inst);
258 }
259 }
260
261 /* XPD - Cross Product
262 * dst.x = src0.y \times src1.z - src1.y \times src0.z
263 * dst.y = src0.z \times src1.x - src1.z \times src0.x
264 * dst.z = src0.x \times src1.y - src1.x \times src0.y
265 * dst.w = 1.0
266 *
267 * ; needs: 2 tmp, imm{1.0}
268 * MUL tmpA.xyz, src0.yzx, src1.zxy
269 * MUL tmpB.xyz, src1.yzx, src0.zxy
270 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
271 * MOV dst.w, imm{1.0}
272 */
273 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
274 #define XPD_TMP 2
275 static void
276 transform_xpd(struct tgsi_transform_context *tctx,
277 struct tgsi_full_instruction *inst)
278 {
279 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
280 struct tgsi_full_dst_register *dst = &inst->Dst[0];
281 struct tgsi_full_src_register *src0 = &inst->Src[0];
282 struct tgsi_full_src_register *src1 = &inst->Src[1];
283 struct tgsi_full_instruction new_inst;
284
285 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
286 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
287 new_inst = tgsi_default_full_instruction();
288 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
289 new_inst.Instruction.NumDstRegs = 1;
290 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
291 new_inst.Instruction.NumSrcRegs = 2;
292 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
293 reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
294 tctx->emit_instruction(tctx, &new_inst);
295
296 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
297 new_inst = tgsi_default_full_instruction();
298 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
299 new_inst.Instruction.NumDstRegs = 1;
300 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
301 new_inst.Instruction.NumSrcRegs = 2;
302 reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
303 reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
304 tctx->emit_instruction(tctx, &new_inst);
305
306 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
307 new_inst = tgsi_default_full_instruction();
308 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
309 new_inst.Instruction.NumDstRegs = 1;
310 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
311 new_inst.Instruction.NumSrcRegs = 2;
312 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
313 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _));
314 tctx->emit_instruction(tctx, &new_inst);
315 }
316
317 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
318 /* MOV dst.w, imm{1.0} */
319 new_inst = tgsi_default_full_instruction();
320 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
321 new_inst.Instruction.NumDstRegs = 1;
322 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
323 new_inst.Instruction.NumSrcRegs = 1;
324 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
325 tctx->emit_instruction(tctx, &new_inst);
326 }
327 }
328
329 /* SCS - Sine Cosine
330 * dst.x = \cos{src.x}
331 * dst.y = \sin{src.x}
332 * dst.z = 0.0
333 * dst.w = 1.0
334 *
335 * ; needs: 1 tmp, imm{0.0, 1.0}
336 * if (dst.x aliases src.x) {
337 * MOV tmpA.x, src.x
338 * src = tmpA
339 * }
340 * COS dst.x, src.x
341 * SIN dst.y, src.x
342 * MOV dst.zw, imm{0.0, 1.0}
343 */
344 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
345 #define SCS_TMP 1
346 static void
347 transform_scs(struct tgsi_transform_context *tctx,
348 struct tgsi_full_instruction *inst)
349 {
350 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
351 struct tgsi_full_dst_register *dst = &inst->Dst[0];
352 struct tgsi_full_src_register *src = &inst->Src[0];
353 struct tgsi_full_instruction new_inst;
354
355 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
356 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
357 src = &ctx->tmp[A].src;
358 }
359
360 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
361 /* COS dst.x, src.x */
362 new_inst = tgsi_default_full_instruction();
363 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
364 new_inst.Instruction.NumDstRegs = 1;
365 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
366 new_inst.Instruction.NumSrcRegs = 1;
367 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
368 tctx->emit_instruction(tctx, &new_inst);
369 }
370
371 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
372 /* SIN dst.y, src.x */
373 new_inst = tgsi_default_full_instruction();
374 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
375 new_inst.Instruction.NumDstRegs = 1;
376 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
377 new_inst.Instruction.NumSrcRegs = 1;
378 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
379 tctx->emit_instruction(tctx, &new_inst);
380 }
381
382 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
383 /* MOV dst.zw, imm{0.0, 1.0} */
384 new_inst = tgsi_default_full_instruction();
385 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
386 new_inst.Instruction.NumDstRegs = 1;
387 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
388 new_inst.Instruction.NumSrcRegs = 1;
389 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
390 tctx->emit_instruction(tctx, &new_inst);
391 }
392 }
393
394 /* LRP - Linear Interpolate
395 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
396 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
397 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
398 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
399 *
400 * ; needs: 2 tmp, imm{1.0}
401 * MUL tmpA, src0, src1
402 * SUB tmpB, imm{1.0}, src0
403 * MUL tmpB, tmpB, src2
404 * ADD dst, tmpA, tmpB
405 */
406 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
407 #define LRP_TMP 2
408 static void
409 transform_lrp(struct tgsi_transform_context *tctx,
410 struct tgsi_full_instruction *inst)
411 {
412 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
413 struct tgsi_full_dst_register *dst = &inst->Dst[0];
414 struct tgsi_full_src_register *src0 = &inst->Src[0];
415 struct tgsi_full_src_register *src1 = &inst->Src[1];
416 struct tgsi_full_src_register *src2 = &inst->Src[2];
417 struct tgsi_full_instruction new_inst;
418
419 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
420 /* MUL tmpA, src0, src1 */
421 new_inst = tgsi_default_full_instruction();
422 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
423 new_inst.Instruction.NumDstRegs = 1;
424 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
425 new_inst.Instruction.NumSrcRegs = 2;
426 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
427 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
428 tctx->emit_instruction(tctx, &new_inst);
429
430 /* SUB tmpB, imm{1.0}, src0 */
431 new_inst = tgsi_default_full_instruction();
432 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
433 new_inst.Instruction.NumDstRegs = 1;
434 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
435 new_inst.Instruction.NumSrcRegs = 2;
436 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
437 reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
438 tctx->emit_instruction(tctx, &new_inst);
439
440 /* MUL tmpB, tmpB, src2 */
441 new_inst = tgsi_default_full_instruction();
442 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
443 new_inst.Instruction.NumDstRegs = 1;
444 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
445 new_inst.Instruction.NumSrcRegs = 2;
446 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
447 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
448 tctx->emit_instruction(tctx, &new_inst);
449
450 /* ADD dst, tmpA, tmpB */
451 new_inst = tgsi_default_full_instruction();
452 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
453 new_inst.Instruction.NumDstRegs = 1;
454 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
455 new_inst.Instruction.NumSrcRegs = 2;
456 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
457 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
458 tctx->emit_instruction(tctx, &new_inst);
459 }
460 }
461
462 /* FRC - Fraction
463 * dst.x = src.x - \lfloor src.x\rfloor
464 * dst.y = src.y - \lfloor src.y\rfloor
465 * dst.z = src.z - \lfloor src.z\rfloor
466 * dst.w = src.w - \lfloor src.w\rfloor
467 *
468 * ; needs: 1 tmp
469 * FLR tmpA, src
470 * SUB dst, src, tmpA
471 */
472 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
473 #define FRC_TMP 1
474 static void
475 transform_frc(struct tgsi_transform_context *tctx,
476 struct tgsi_full_instruction *inst)
477 {
478 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
479 struct tgsi_full_dst_register *dst = &inst->Dst[0];
480 struct tgsi_full_src_register *src = &inst->Src[0];
481 struct tgsi_full_instruction new_inst;
482
483 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
484 /* FLR tmpA, src */
485 new_inst = tgsi_default_full_instruction();
486 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
487 new_inst.Instruction.NumDstRegs = 1;
488 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
489 new_inst.Instruction.NumSrcRegs = 1;
490 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
491 tctx->emit_instruction(tctx, &new_inst);
492
493 /* SUB dst, src, tmpA */
494 new_inst = tgsi_default_full_instruction();
495 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
496 new_inst.Instruction.NumDstRegs = 1;
497 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
498 new_inst.Instruction.NumSrcRegs = 2;
499 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
500 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
501 tctx->emit_instruction(tctx, &new_inst);
502 }
503 }
504
505 /* POW - Power
506 * dst.x = src0.x^{src1.x}
507 * dst.y = src0.x^{src1.x}
508 * dst.z = src0.x^{src1.x}
509 * dst.w = src0.x^{src1.x}
510 *
511 * ; needs: 1 tmp
512 * LG2 tmpA.x, src0.x
513 * MUL tmpA.x, src1.x, tmpA.x
514 * EX2 dst, tmpA.x
515 */
516 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
517 #define POW_TMP 1
518 static void
519 transform_pow(struct tgsi_transform_context *tctx,
520 struct tgsi_full_instruction *inst)
521 {
522 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
523 struct tgsi_full_dst_register *dst = &inst->Dst[0];
524 struct tgsi_full_src_register *src0 = &inst->Src[0];
525 struct tgsi_full_src_register *src1 = &inst->Src[1];
526 struct tgsi_full_instruction new_inst;
527
528 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
529 /* LG2 tmpA.x, src0.x */
530 new_inst = tgsi_default_full_instruction();
531 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
532 new_inst.Instruction.NumDstRegs = 1;
533 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
534 new_inst.Instruction.NumSrcRegs = 1;
535 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
536 tctx->emit_instruction(tctx, &new_inst);
537
538 /* MUL tmpA.x, src1.x, tmpA.x */
539 new_inst = tgsi_default_full_instruction();
540 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
541 new_inst.Instruction.NumDstRegs = 1;
542 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
543 new_inst.Instruction.NumSrcRegs = 2;
544 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
545 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
546 tctx->emit_instruction(tctx, &new_inst);
547
548 /* EX2 dst, tmpA.x */
549 new_inst = tgsi_default_full_instruction();
550 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
551 new_inst.Instruction.NumDstRegs = 1;
552 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
553 new_inst.Instruction.NumSrcRegs = 1;
554 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
555 tctx->emit_instruction(tctx, &new_inst);
556 }
557 }
558
559 /* LIT - Light Coefficients
560 * dst.x = 1.0
561 * dst.y = max(src.x, 0.0)
562 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
563 * dst.w = 1.0
564 *
565 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
566 * MAX tmpA.xy, src.xy, imm{0.0}
567 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
568 * LG2 tmpA.y, tmpA.y
569 * MUL tmpA.y, tmpA.z, tmpA.y
570 * EX2 tmpA.y, tmpA.y
571 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
572 * MOV dst.yz, tmpA.xy
573 * MOV dst.xw, imm{1.0}
574 */
575 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
576 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
577 #define LIT_TMP 1
578 static void
579 transform_lit(struct tgsi_transform_context *tctx,
580 struct tgsi_full_instruction *inst)
581 {
582 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
583 struct tgsi_full_dst_register *dst = &inst->Dst[0];
584 struct tgsi_full_src_register *src = &inst->Src[0];
585 struct tgsi_full_instruction new_inst;
586
587 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
588 /* MAX tmpA.xy, src.xy, imm{0.0} */
589 new_inst = tgsi_default_full_instruction();
590 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
591 new_inst.Instruction.NumDstRegs = 1;
592 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
593 new_inst.Instruction.NumSrcRegs = 2;
594 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
595 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
596 tctx->emit_instruction(tctx, &new_inst);
597
598 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
599 new_inst = tgsi_default_full_instruction();
600 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
601 new_inst.Instruction.NumDstRegs = 1;
602 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
603 new_inst.Instruction.NumSrcRegs = 3;
604 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
605 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
606 new_inst.Src[1].Register.Negate = true;
607 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
608 tctx->emit_instruction(tctx, &new_inst);
609
610 /* LG2 tmpA.y, tmpA.y */
611 new_inst = tgsi_default_full_instruction();
612 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
613 new_inst.Instruction.NumDstRegs = 1;
614 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
615 new_inst.Instruction.NumSrcRegs = 1;
616 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
617 tctx->emit_instruction(tctx, &new_inst);
618
619 /* MUL tmpA.y, tmpA.z, tmpA.y */
620 new_inst = tgsi_default_full_instruction();
621 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
622 new_inst.Instruction.NumDstRegs = 1;
623 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
624 new_inst.Instruction.NumSrcRegs = 2;
625 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
626 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
627 tctx->emit_instruction(tctx, &new_inst);
628
629 /* EX2 tmpA.y, tmpA.y */
630 new_inst = tgsi_default_full_instruction();
631 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
632 new_inst.Instruction.NumDstRegs = 1;
633 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
634 new_inst.Instruction.NumSrcRegs = 1;
635 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
636 tctx->emit_instruction(tctx, &new_inst);
637
638 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
639 new_inst = tgsi_default_full_instruction();
640 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
641 new_inst.Instruction.NumDstRegs = 1;
642 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
643 new_inst.Instruction.NumSrcRegs = 3;
644 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
645 new_inst.Src[0].Register.Negate = true;
646 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
647 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
648 tctx->emit_instruction(tctx, &new_inst);
649
650 /* MOV dst.yz, tmpA.xy */
651 new_inst = tgsi_default_full_instruction();
652 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
653 new_inst.Instruction.NumDstRegs = 1;
654 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
655 new_inst.Instruction.NumSrcRegs = 1;
656 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
657 tctx->emit_instruction(tctx, &new_inst);
658 }
659
660 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
661 /* MOV dst.xw, imm{1.0} */
662 new_inst = tgsi_default_full_instruction();
663 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
664 new_inst.Instruction.NumDstRegs = 1;
665 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
666 new_inst.Instruction.NumSrcRegs = 1;
667 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
668 tctx->emit_instruction(tctx, &new_inst);
669 }
670 }
671
672 /* EXP - Approximate Exponential Base 2
673 * dst.x = 2^{\lfloor src.x\rfloor}
674 * dst.y = src.x - \lfloor src.x\rfloor
675 * dst.z = 2^{src.x}
676 * dst.w = 1.0
677 *
678 * ; needs: 1 tmp, imm{1.0}
679 * FLR tmpA.x, src.x
680 * EX2 tmpA.y, src.x
681 * SUB dst.y, src.x, tmpA.x
682 * EX2 dst.x, tmpA.x
683 * MOV dst.z, tmpA.y
684 * MOV dst.w, imm{1.0}
685 */
686 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
687 NINST(1)+ NINST(1) - OINST(1))
688 #define EXP_TMP 1
689 static void
690 transform_exp(struct tgsi_transform_context *tctx,
691 struct tgsi_full_instruction *inst)
692 {
693 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
694 struct tgsi_full_dst_register *dst = &inst->Dst[0];
695 struct tgsi_full_src_register *src = &inst->Src[0];
696 struct tgsi_full_instruction new_inst;
697
698 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
699 /* FLR tmpA.x, src.x */
700 new_inst = tgsi_default_full_instruction();
701 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
702 new_inst.Instruction.NumDstRegs = 1;
703 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
704 new_inst.Instruction.NumSrcRegs = 1;
705 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
706 tctx->emit_instruction(tctx, &new_inst);
707 }
708
709 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
710 /* EX2 tmpA.y, src.x */
711 new_inst = tgsi_default_full_instruction();
712 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
713 new_inst.Instruction.NumDstRegs = 1;
714 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
715 new_inst.Instruction.NumSrcRegs = 1;
716 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
717 tctx->emit_instruction(tctx, &new_inst);
718 }
719
720 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
721 /* SUB dst.y, src.x, tmpA.x */
722 new_inst = tgsi_default_full_instruction();
723 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
724 new_inst.Instruction.NumDstRegs = 1;
725 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
726 new_inst.Instruction.NumSrcRegs = 2;
727 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
728 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
729 tctx->emit_instruction(tctx, &new_inst);
730 }
731
732 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
733 /* EX2 dst.x, tmpA.x */
734 new_inst = tgsi_default_full_instruction();
735 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
736 new_inst.Instruction.NumDstRegs = 1;
737 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
738 new_inst.Instruction.NumSrcRegs = 1;
739 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
740 tctx->emit_instruction(tctx, &new_inst);
741 }
742
743 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
744 /* MOV dst.z, tmpA.y */
745 new_inst = tgsi_default_full_instruction();
746 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
747 new_inst.Instruction.NumDstRegs = 1;
748 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
749 new_inst.Instruction.NumSrcRegs = 1;
750 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
751 tctx->emit_instruction(tctx, &new_inst);
752 }
753
754 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
755 /* MOV dst.w, imm{1.0} */
756 new_inst = tgsi_default_full_instruction();
757 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
758 new_inst.Instruction.NumDstRegs = 1;
759 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
760 new_inst.Instruction.NumSrcRegs = 1;
761 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
762 tctx->emit_instruction(tctx, &new_inst);
763 }
764 }
765
766 /* LOG - Approximate Logarithm Base 2
767 * dst.x = \lfloor\log_2{|src.x|}\rfloor
768 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
769 * dst.z = \log_2{|src.x|}
770 * dst.w = 1.0
771 *
772 * ; needs: 1 tmp, imm{1.0}
773 * LG2 tmpA.x, |src.x|
774 * FLR tmpA.y, tmpA.x
775 * EX2 tmpA.z, tmpA.y
776 * RCP tmpA.z, tmpA.z
777 * MUL dst.y, |src.x|, tmpA.z
778 * MOV dst.xz, tmpA.yx
779 * MOV dst.w, imm{1.0}
780 */
781 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
782 NINST(2) + NINST(1) + NINST(1) - OINST(1))
783 #define LOG_TMP 1
784 static void
785 transform_log(struct tgsi_transform_context *tctx,
786 struct tgsi_full_instruction *inst)
787 {
788 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
789 struct tgsi_full_dst_register *dst = &inst->Dst[0];
790 struct tgsi_full_src_register *src = &inst->Src[0];
791 struct tgsi_full_instruction new_inst;
792
793 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
794 /* LG2 tmpA.x, |src.x| */
795 new_inst = tgsi_default_full_instruction();
796 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
797 new_inst.Instruction.NumDstRegs = 1;
798 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
799 new_inst.Instruction.NumSrcRegs = 1;
800 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
801 new_inst.Src[0].Register.Absolute = true;
802 tctx->emit_instruction(tctx, &new_inst);
803 }
804
805 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
806 /* FLR tmpA.y, tmpA.x */
807 new_inst = tgsi_default_full_instruction();
808 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
809 new_inst.Instruction.NumDstRegs = 1;
810 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
811 new_inst.Instruction.NumSrcRegs = 1;
812 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
813 tctx->emit_instruction(tctx, &new_inst);
814 }
815
816 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
817 /* EX2 tmpA.z, tmpA.y */
818 new_inst = tgsi_default_full_instruction();
819 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
820 new_inst.Instruction.NumDstRegs = 1;
821 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
822 new_inst.Instruction.NumSrcRegs = 1;
823 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
824 tctx->emit_instruction(tctx, &new_inst);
825
826 /* RCP tmpA.z, tmpA.z */
827 new_inst = tgsi_default_full_instruction();
828 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
829 new_inst.Instruction.NumDstRegs = 1;
830 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
831 new_inst.Instruction.NumSrcRegs = 1;
832 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
833 tctx->emit_instruction(tctx, &new_inst);
834
835 /* MUL dst.y, |src.x|, tmpA.z */
836 new_inst = tgsi_default_full_instruction();
837 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
838 new_inst.Instruction.NumDstRegs = 1;
839 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
840 new_inst.Instruction.NumSrcRegs = 2;
841 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
842 new_inst.Src[0].Register.Absolute = true;
843 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
844 tctx->emit_instruction(tctx, &new_inst);
845 }
846
847 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
848 /* MOV dst.xz, tmpA.yx */
849 new_inst = tgsi_default_full_instruction();
850 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
851 new_inst.Instruction.NumDstRegs = 1;
852 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
853 new_inst.Instruction.NumSrcRegs = 1;
854 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
855 tctx->emit_instruction(tctx, &new_inst);
856 }
857
858 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
859 /* MOV dst.w, imm{1.0} */
860 new_inst = tgsi_default_full_instruction();
861 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
862 new_inst.Instruction.NumDstRegs = 1;
863 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
864 new_inst.Instruction.NumSrcRegs = 1;
865 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
866 tctx->emit_instruction(tctx, &new_inst);
867 }
868 }
869
870 /* DP4 - 4-component Dot Product
871 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
872 *
873 * DP3 - 3-component Dot Product
874 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
875 *
876 * DPH - Homogeneous Dot Product
877 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
878 *
879 * DP2 - 2-component Dot Product
880 * dst = src0.x \times src1.x + src0.y \times src1.y
881 *
882 * DP2A - 2-component Dot Product And Add
883 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
884 *
885 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
886 * operations, which is what you'd prefer for a ISA that is natively
887 * scalar. Probably a native vector ISA would at least already have
888 * DP4/DP3 instructions, but perhaps there is room for an alternative
889 * translation for DPH/DP2/DP2A using vector instructions.
890 *
891 * ; needs: 1 tmp
892 * MUL tmpA.x, src0.x, src1.x
893 * MAD tmpA.x, src0.y, src1.y, tmpA.x
894 * if (DPH || DP3 || DP4) {
895 * MAD tmpA.x, src0.z, src1.z, tmpA.x
896 * if (DPH) {
897 * ADD tmpA.x, src1.w, tmpA.x
898 * } else if (DP4) {
899 * MAD tmpA.x, src0.w, src1.w, tmpA.x
900 * }
901 * } else if (DP2A) {
902 * ADD tmpA.x, src2.x, tmpA.x
903 * }
904 * ; fixup last instruction to replicate into dst
905 */
906 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
907 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
908 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
909 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
910 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
911 #define DOTP_TMP 1
912 static void
913 transform_dotp(struct tgsi_transform_context *tctx,
914 struct tgsi_full_instruction *inst)
915 {
916 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
917 struct tgsi_full_dst_register *dst = &inst->Dst[0];
918 struct tgsi_full_src_register *src0 = &inst->Src[0];
919 struct tgsi_full_src_register *src1 = &inst->Src[1];
920 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
921 struct tgsi_full_instruction new_inst;
922 unsigned opcode = inst->Instruction.Opcode;
923
924 /* NOTE: any potential last instruction must replicate src on all
925 * components (since it could be re-written to write to final dst)
926 */
927
928 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
929 /* MUL tmpA.x, src0.x, src1.x */
930 new_inst = tgsi_default_full_instruction();
931 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
932 new_inst.Instruction.NumDstRegs = 1;
933 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
934 new_inst.Instruction.NumSrcRegs = 2;
935 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
936 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
937 tctx->emit_instruction(tctx, &new_inst);
938
939 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
940 new_inst = tgsi_default_full_instruction();
941 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
942 new_inst.Instruction.NumDstRegs = 1;
943 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
944 new_inst.Instruction.NumSrcRegs = 3;
945 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
946 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
947 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
948
949 if ((opcode == TGSI_OPCODE_DPH) ||
950 (opcode == TGSI_OPCODE_DP3) ||
951 (opcode == TGSI_OPCODE_DP4)) {
952 tctx->emit_instruction(tctx, &new_inst);
953
954 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
955 new_inst = tgsi_default_full_instruction();
956 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
957 new_inst.Instruction.NumDstRegs = 1;
958 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
959 new_inst.Instruction.NumSrcRegs = 3;
960 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
961 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
962 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
963
964 if (opcode == TGSI_OPCODE_DPH) {
965 tctx->emit_instruction(tctx, &new_inst);
966
967 /* ADD tmpA.x, src1.w, tmpA.x */
968 new_inst = tgsi_default_full_instruction();
969 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
970 new_inst.Instruction.NumDstRegs = 1;
971 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
972 new_inst.Instruction.NumSrcRegs = 2;
973 reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
974 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
975 } else if (opcode == TGSI_OPCODE_DP4) {
976 tctx->emit_instruction(tctx, &new_inst);
977
978 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
979 new_inst = tgsi_default_full_instruction();
980 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
981 new_inst.Instruction.NumDstRegs = 1;
982 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
983 new_inst.Instruction.NumSrcRegs = 3;
984 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
985 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
986 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
987 }
988 } else if (opcode == TGSI_OPCODE_DP2A) {
989 tctx->emit_instruction(tctx, &new_inst);
990
991 /* ADD tmpA.x, src2.x, tmpA.x */
992 new_inst = tgsi_default_full_instruction();
993 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
994 new_inst.Instruction.NumDstRegs = 1;
995 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
996 new_inst.Instruction.NumSrcRegs = 2;
997 reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
998 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
999 }
1000
1001 /* fixup last instruction to write to dst: */
1002 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1003
1004 tctx->emit_instruction(tctx, &new_inst);
1005 }
1006 }
1007
1008 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1009 * in the case of TXP, the clamping must happen *after* projection, so
1010 * we need to lower TXP to TEX.
1011 *
1012 * MOV tmpA, src0
1013 * if (opc == TXP) {
1014 * ; do perspective division manually before clamping:
1015 * RCP tmpB, tmpA.w
1016 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1017 * opc = TEX;
1018 * }
1019 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1020 * <opc> dst, tmpA, ...
1021 */
1022 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1023 #define SAMP_TMP 2
1024 static int
1025 transform_samp(struct tgsi_transform_context *tctx,
1026 struct tgsi_full_instruction *inst)
1027 {
1028 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1029 struct tgsi_full_src_register *coord = &inst->Src[0];
1030 struct tgsi_full_src_register *samp;
1031 struct tgsi_full_instruction new_inst;
1032 /* mask is clamped coords, pmask is all coords (for projection): */
1033 unsigned mask = 0, pmask = 0, smask;
1034 unsigned tex = inst->Texture.Texture;
1035 unsigned opcode = inst->Instruction.Opcode;
1036 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1037 (ctx->config->lower_TXP & (1 << tex));
1038
1039 if (opcode == TGSI_OPCODE_TXB2) {
1040 samp = &inst->Src[2];
1041 } else {
1042 samp = &inst->Src[1];
1043 }
1044
1045 /* convert sampler # to bitmask to test: */
1046 smask = 1 << samp->Register.Index;
1047
1048 /* check if we actually need to lower this one: */
1049 if (!(ctx->saturate & smask) && !lower_txp)
1050 return -1;
1051
1052 /* figure out which coordinates need saturating:
1053 * - RECT textures should not get saturated
1054 * - array index coords should not get saturated
1055 */
1056 switch (tex) {
1057 case TGSI_TEXTURE_3D:
1058 case TGSI_TEXTURE_CUBE:
1059 case TGSI_TEXTURE_CUBE_ARRAY:
1060 case TGSI_TEXTURE_SHADOWCUBE:
1061 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1062 if (ctx->config->saturate_r & smask)
1063 mask |= TGSI_WRITEMASK_Z;
1064 pmask |= TGSI_WRITEMASK_Z;
1065 /* fallthrough */
1066
1067 case TGSI_TEXTURE_2D:
1068 case TGSI_TEXTURE_2D_ARRAY:
1069 case TGSI_TEXTURE_SHADOW2D:
1070 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1071 case TGSI_TEXTURE_2D_MSAA:
1072 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1073 if (ctx->config->saturate_t & smask)
1074 mask |= TGSI_WRITEMASK_Y;
1075 pmask |= TGSI_WRITEMASK_Y;
1076 /* fallthrough */
1077
1078 case TGSI_TEXTURE_1D:
1079 case TGSI_TEXTURE_1D_ARRAY:
1080 case TGSI_TEXTURE_SHADOW1D:
1081 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1082 if (ctx->config->saturate_s & smask)
1083 mask |= TGSI_WRITEMASK_X;
1084 pmask |= TGSI_WRITEMASK_X;
1085 break;
1086
1087 case TGSI_TEXTURE_RECT:
1088 case TGSI_TEXTURE_SHADOWRECT:
1089 /* we don't saturate, but in case of lower_txp we
1090 * still need to do the perspective divide:
1091 */
1092 pmask = TGSI_WRITEMASK_XY;
1093 break;
1094 }
1095
1096 /* sanity check.. driver could be asking to saturate a non-
1097 * existent coordinate component:
1098 */
1099 if (!mask && !lower_txp)
1100 return -1;
1101
1102 /* MOV tmpA, src0 */
1103 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1104
1105 /* This is a bit sad.. we need to clamp *after* the coords
1106 * are projected, which means lowering TXP to TEX and doing
1107 * the projection ourself. But since I haven't figured out
1108 * how to make the lowering code deliver an electric shock
1109 * to anyone using GL_CLAMP, we must do this instead:
1110 */
1111 if (opcode == TGSI_OPCODE_TXP) {
1112 /* RCP tmpB.x tmpA.w */
1113 new_inst = tgsi_default_full_instruction();
1114 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1115 new_inst.Instruction.NumDstRegs = 1;
1116 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1117 new_inst.Instruction.NumSrcRegs = 1;
1118 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1119 tctx->emit_instruction(tctx, &new_inst);
1120
1121 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1122 new_inst = tgsi_default_full_instruction();
1123 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1124 new_inst.Instruction.NumDstRegs = 1;
1125 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1126 new_inst.Instruction.NumSrcRegs = 2;
1127 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1128 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1129 tctx->emit_instruction(tctx, &new_inst);
1130
1131 opcode = TGSI_OPCODE_TEX;
1132 }
1133
1134 /* MOV_SAT tmpA.<mask>, tmpA */
1135 if (mask) {
1136 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
1137 TGSI_SAT_ZERO_ONE);
1138 }
1139
1140 /* modify the texture samp instruction to take fixed up coord: */
1141 new_inst = *inst;
1142 new_inst.Instruction.Opcode = opcode;
1143 new_inst.Src[0] = ctx->tmp[A].src;
1144 tctx->emit_instruction(tctx, &new_inst);
1145
1146 return 0;
1147 }
1148
1149 /* Two-sided color emulation:
1150 * For each COLOR input, create a corresponding BCOLOR input, plus
1151 * CMP instruction to select front or back color based on FACE
1152 */
1153 #define TWOSIDE_GROW(n) ( \
1154 2 + /* FACE */ \
1155 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1156 ((n) * 1) + /* TEMP[] */ \
1157 ((n) * NINST(3)) /* CMP instr */ \
1158 )
1159
1160 static void
1161 emit_twoside(struct tgsi_transform_context *tctx)
1162 {
1163 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1164 struct tgsi_shader_info *info = ctx->info;
1165 struct tgsi_full_declaration decl;
1166 struct tgsi_full_instruction new_inst;
1167 unsigned inbase, tmpbase;
1168 int i;
1169
1170 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1171 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1172
1173 /* additional inputs for BCOLOR's */
1174 for (i = 0; i < ctx->two_side_colors; i++) {
1175 decl = tgsi_default_full_declaration();
1176 decl.Declaration.File = TGSI_FILE_INPUT;
1177 decl.Declaration.Semantic = true;
1178 decl.Range.First = decl.Range.Last = inbase + i;
1179 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1180 decl.Semantic.Index =
1181 info->input_semantic_index[ctx->two_side_idx[i]];
1182 tctx->emit_declaration(tctx, &decl);
1183 }
1184
1185 /* additional input for FACE */
1186 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1187 decl = tgsi_default_full_declaration();
1188 decl.Declaration.File = TGSI_FILE_INPUT;
1189 decl.Declaration.Semantic = true;
1190 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1191 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1192 decl.Semantic.Index = 0;
1193 tctx->emit_declaration(tctx, &decl);
1194
1195 ctx->face_idx = decl.Range.First;
1196 }
1197
1198 /* additional temps for COLOR/BCOLOR selection: */
1199 for (i = 0; i < ctx->two_side_colors; i++) {
1200 decl = tgsi_default_full_declaration();
1201 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1202 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1203 tctx->emit_declaration(tctx, &decl);
1204 }
1205
1206 /* and finally additional instructions to select COLOR/BCOLOR: */
1207 for (i = 0; i < ctx->two_side_colors; i++) {
1208 new_inst = tgsi_default_full_instruction();
1209 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1210
1211 new_inst.Instruction.NumDstRegs = 1;
1212 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1213 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1214 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1215
1216 new_inst.Instruction.NumSrcRegs = 3;
1217 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1218 new_inst.Src[0].Register.Index = ctx->face_idx;
1219 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1220 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1221 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1222 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1223 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1224 new_inst.Src[1].Register.Index = inbase + i;
1225 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1226 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1227 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1228 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1229 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1230 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1231 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1232 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1233 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1234 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1235
1236 tctx->emit_instruction(tctx, &new_inst);
1237 }
1238 }
1239
1240 static void
1241 emit_decls(struct tgsi_transform_context *tctx)
1242 {
1243 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1244 struct tgsi_shader_info *info = ctx->info;
1245 struct tgsi_full_declaration decl;
1246 struct tgsi_full_immediate immed;
1247 unsigned tmpbase;
1248 int i;
1249
1250 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1251
1252 ctx->color_base = tmpbase + ctx->numtmp;
1253
1254 /* declare immediate: */
1255 immed = tgsi_default_full_immediate();
1256 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1257 immed.u[0].Float = 0.0;
1258 immed.u[1].Float = 1.0;
1259 immed.u[2].Float = 128.0;
1260 immed.u[3].Float = 0.0;
1261 tctx->emit_immediate(tctx, &immed);
1262
1263 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1264 ctx->imm.Register.Index = info->immediate_count;
1265 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1266 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1267 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1268 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1269
1270 /* declare temp regs: */
1271 for (i = 0; i < ctx->numtmp; i++) {
1272 decl = tgsi_default_full_declaration();
1273 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1274 decl.Range.First = decl.Range.Last = tmpbase + i;
1275 tctx->emit_declaration(tctx, &decl);
1276
1277 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1278 ctx->tmp[i].src.Register.Index = tmpbase + i;
1279 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1280 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1281 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1282 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1283
1284 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1285 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1286 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1287 }
1288
1289 if (ctx->two_side_colors)
1290 emit_twoside(tctx);
1291 }
1292
1293 static void
1294 rename_color_inputs(struct tgsi_lowering_context *ctx,
1295 struct tgsi_full_instruction *inst)
1296 {
1297 unsigned i, j;
1298 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1299 struct tgsi_src_register *src = &inst->Src[i].Register;
1300 if (src->File == TGSI_FILE_INPUT) {
1301 for (j = 0; j < ctx->two_side_colors; j++) {
1302 if (src->Index == ctx->two_side_idx[j]) {
1303 src->File = TGSI_FILE_TEMPORARY;
1304 src->Index = ctx->color_base + j;
1305 break;
1306 }
1307 }
1308 }
1309 }
1310
1311 }
1312
1313 static void
1314 transform_instr(struct tgsi_transform_context *tctx,
1315 struct tgsi_full_instruction *inst)
1316 {
1317 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1318
1319 if (!ctx->emitted_decls) {
1320 emit_decls(tctx);
1321 ctx->emitted_decls = 1;
1322 }
1323
1324 /* if emulating two-sided-color, we need to re-write some
1325 * src registers:
1326 */
1327 if (ctx->two_side_colors)
1328 rename_color_inputs(ctx, inst);
1329
1330 switch (inst->Instruction.Opcode) {
1331 case TGSI_OPCODE_DST:
1332 if (!ctx->config->lower_DST)
1333 goto skip;
1334 transform_dst(tctx, inst);
1335 break;
1336 case TGSI_OPCODE_XPD:
1337 if (!ctx->config->lower_XPD)
1338 goto skip;
1339 transform_xpd(tctx, inst);
1340 break;
1341 case TGSI_OPCODE_SCS:
1342 if (!ctx->config->lower_SCS)
1343 goto skip;
1344 transform_scs(tctx, inst);
1345 break;
1346 case TGSI_OPCODE_LRP:
1347 if (!ctx->config->lower_LRP)
1348 goto skip;
1349 transform_lrp(tctx, inst);
1350 break;
1351 case TGSI_OPCODE_FRC:
1352 if (!ctx->config->lower_FRC)
1353 goto skip;
1354 transform_frc(tctx, inst);
1355 break;
1356 case TGSI_OPCODE_POW:
1357 if (!ctx->config->lower_POW)
1358 goto skip;
1359 transform_pow(tctx, inst);
1360 break;
1361 case TGSI_OPCODE_LIT:
1362 if (!ctx->config->lower_LIT)
1363 goto skip;
1364 transform_lit(tctx, inst);
1365 break;
1366 case TGSI_OPCODE_EXP:
1367 if (!ctx->config->lower_EXP)
1368 goto skip;
1369 transform_exp(tctx, inst);
1370 break;
1371 case TGSI_OPCODE_LOG:
1372 if (!ctx->config->lower_LOG)
1373 goto skip;
1374 transform_log(tctx, inst);
1375 break;
1376 case TGSI_OPCODE_DP4:
1377 if (!ctx->config->lower_DP4)
1378 goto skip;
1379 transform_dotp(tctx, inst);
1380 break;
1381 case TGSI_OPCODE_DP3:
1382 if (!ctx->config->lower_DP3)
1383 goto skip;
1384 transform_dotp(tctx, inst);
1385 break;
1386 case TGSI_OPCODE_DPH:
1387 if (!ctx->config->lower_DPH)
1388 goto skip;
1389 transform_dotp(tctx, inst);
1390 break;
1391 case TGSI_OPCODE_DP2:
1392 if (!ctx->config->lower_DP2)
1393 goto skip;
1394 transform_dotp(tctx, inst);
1395 break;
1396 case TGSI_OPCODE_DP2A:
1397 if (!ctx->config->lower_DP2A)
1398 goto skip;
1399 transform_dotp(tctx, inst);
1400 break;
1401 case TGSI_OPCODE_TEX:
1402 case TGSI_OPCODE_TXP:
1403 case TGSI_OPCODE_TXB:
1404 case TGSI_OPCODE_TXB2:
1405 case TGSI_OPCODE_TXL:
1406 if (transform_samp(tctx, inst))
1407 goto skip;
1408 break;
1409 default:
1410 skip:
1411 tctx->emit_instruction(tctx, inst);
1412 break;
1413 }
1414 }
1415
1416 /* returns NULL if no lowering required, else returns the new
1417 * tokens (which caller is required to free()). In either case
1418 * returns the current info.
1419 */
1420 const struct tgsi_token *
1421 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1422 const struct tgsi_token *tokens,
1423 struct tgsi_shader_info *info)
1424 {
1425 struct tgsi_lowering_context ctx;
1426 struct tgsi_token *newtoks;
1427 int newlen, numtmp;
1428
1429 /* sanity check in case limit is ever increased: */
1430 assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1431
1432 memset(&ctx, 0, sizeof(ctx));
1433 ctx.base.transform_instruction = transform_instr;
1434 ctx.info = info;
1435 ctx.config = config;
1436
1437 tgsi_scan_shader(tokens, info);
1438
1439 /* if we are adding fragment shader support to emulate two-sided
1440 * color, then figure out the number of additional inputs we need
1441 * to create for BCOLOR's..
1442 */
1443 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1444 config->color_two_side) {
1445 int i;
1446 ctx.face_idx = -1;
1447 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1448 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1449 ctx.two_side_idx[ctx.two_side_colors++] = i;
1450 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1451 ctx.face_idx = i;
1452 }
1453 }
1454
1455 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1456
1457 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1458 /* if there are no instructions to lower, then we are done: */
1459 if (!(OPCS(DST) ||
1460 OPCS(XPD) ||
1461 OPCS(SCS) ||
1462 OPCS(LRP) ||
1463 OPCS(FRC) ||
1464 OPCS(POW) ||
1465 OPCS(LIT) ||
1466 OPCS(EXP) ||
1467 OPCS(LOG) ||
1468 OPCS(DP4) ||
1469 OPCS(DP3) ||
1470 OPCS(DPH) ||
1471 OPCS(DP2) ||
1472 OPCS(DP2A) ||
1473 OPCS(TXP) ||
1474 ctx.two_side_colors ||
1475 ctx.saturate))
1476 return NULL;
1477
1478 #if 0 /* debug */
1479 _debug_printf("BEFORE:");
1480 tgsi_dump(tokens, 0);
1481 #endif
1482
1483 numtmp = 0;
1484 newlen = tgsi_num_tokens(tokens);
1485 if (OPCS(DST)) {
1486 newlen += DST_GROW * OPCS(DST);
1487 numtmp = MAX2(numtmp, DST_TMP);
1488 }
1489 if (OPCS(XPD)) {
1490 newlen += XPD_GROW * OPCS(XPD);
1491 numtmp = MAX2(numtmp, XPD_TMP);
1492 }
1493 if (OPCS(SCS)) {
1494 newlen += SCS_GROW * OPCS(SCS);
1495 numtmp = MAX2(numtmp, SCS_TMP);
1496 }
1497 if (OPCS(LRP)) {
1498 newlen += LRP_GROW * OPCS(LRP);
1499 numtmp = MAX2(numtmp, LRP_TMP);
1500 }
1501 if (OPCS(FRC)) {
1502 newlen += FRC_GROW * OPCS(FRC);
1503 numtmp = MAX2(numtmp, FRC_TMP);
1504 }
1505 if (OPCS(POW)) {
1506 newlen += POW_GROW * OPCS(POW);
1507 numtmp = MAX2(numtmp, POW_TMP);
1508 }
1509 if (OPCS(LIT)) {
1510 newlen += LIT_GROW * OPCS(LIT);
1511 numtmp = MAX2(numtmp, LIT_TMP);
1512 }
1513 if (OPCS(EXP)) {
1514 newlen += EXP_GROW * OPCS(EXP);
1515 numtmp = MAX2(numtmp, EXP_TMP);
1516 }
1517 if (OPCS(LOG)) {
1518 newlen += LOG_GROW * OPCS(LOG);
1519 numtmp = MAX2(numtmp, LOG_TMP);
1520 }
1521 if (OPCS(DP4)) {
1522 newlen += DP4_GROW * OPCS(DP4);
1523 numtmp = MAX2(numtmp, DOTP_TMP);
1524 }
1525 if (OPCS(DP3)) {
1526 newlen += DP3_GROW * OPCS(DP3);
1527 numtmp = MAX2(numtmp, DOTP_TMP);
1528 }
1529 if (OPCS(DPH)) {
1530 newlen += DPH_GROW * OPCS(DPH);
1531 numtmp = MAX2(numtmp, DOTP_TMP);
1532 }
1533 if (OPCS(DP2)) {
1534 newlen += DP2_GROW * OPCS(DP2);
1535 numtmp = MAX2(numtmp, DOTP_TMP);
1536 }
1537 if (OPCS(DP2A)) {
1538 newlen += DP2A_GROW * OPCS(DP2A);
1539 numtmp = MAX2(numtmp, DOTP_TMP);
1540 }
1541 if (ctx.saturate || config->lower_TXP) {
1542 int n = 0;
1543
1544 if (ctx.saturate) {
1545 n = info->opcode_count[TGSI_OPCODE_TEX] +
1546 info->opcode_count[TGSI_OPCODE_TXP] +
1547 info->opcode_count[TGSI_OPCODE_TXB] +
1548 info->opcode_count[TGSI_OPCODE_TXB2] +
1549 info->opcode_count[TGSI_OPCODE_TXL];
1550 } else if (config->lower_TXP) {
1551 n = info->opcode_count[TGSI_OPCODE_TXP];
1552 }
1553
1554 newlen += SAMP_GROW * n;
1555 numtmp = MAX2(numtmp, SAMP_TMP);
1556 }
1557
1558 /* specifically don't include two_side_colors temps in the count: */
1559 ctx.numtmp = numtmp;
1560
1561 if (ctx.two_side_colors) {
1562 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1563 /* note: we permanently consume temp regs, re-writing references
1564 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1565 * instruction that selects which varying to use):
1566 */
1567 numtmp += ctx.two_side_colors;
1568 }
1569
1570 newlen += 2 * numtmp;
1571 newlen += 5; /* immediate */
1572
1573 newtoks = tgsi_alloc_tokens(newlen);
1574 if (!newtoks)
1575 return NULL;
1576
1577 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1578
1579 tgsi_scan_shader(newtoks, info);
1580
1581 #if 0 /* debug */
1582 _debug_printf("AFTER:");
1583 tgsi_dump(newtoks, 0);
1584 #endif
1585
1586 return newtoks;
1587 }