freedreno: add texcoord clamp support to lowering
[mesa.git] / src / gallium / drivers / freedreno / freedreno_lowering.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
32
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
35
36 #include "freedreno_lowering.h"
37
38 struct fd_lowering_context {
39 struct tgsi_transform_context base;
40 const struct fd_lowering_config *config;
41 struct tgsi_shader_info *info;
42 unsigned two_side_colors;
43 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
44 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
45 int face_idx;
46 unsigned numtmp;
47 struct {
48 struct tgsi_full_src_register src;
49 struct tgsi_full_dst_register dst;
50 } tmp[2];
51 #define A 0
52 #define B 1
53 struct tgsi_full_src_register imm;
54 int emitted_decls;
55 unsigned saturate;
56 };
57
58 static inline struct fd_lowering_context *
59 fd_lowering_context(struct tgsi_transform_context *tctx)
60 {
61 return (struct fd_lowering_context *)tctx;
62 }
63
64 /*
65 * Utility helpers:
66 */
67
68 static void
69 reg_dst(struct tgsi_full_dst_register *dst,
70 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
71 {
72 *dst = *orig_dst;
73 dst->Register.WriteMask &= wrmask;
74 assert(dst->Register.WriteMask);
75 }
76
77 static inline void
78 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
79 {
80 swiz[0] = src->SwizzleX;
81 swiz[1] = src->SwizzleY;
82 swiz[2] = src->SwizzleZ;
83 swiz[3] = src->SwizzleW;
84 }
85
86 static void
87 reg_src(struct tgsi_full_src_register *src,
88 const struct tgsi_full_src_register *orig_src,
89 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
90 {
91 unsigned swiz[4];
92 get_swiz(swiz, &orig_src->Register);
93 *src = *orig_src;
94 src->Register.SwizzleX = swiz[sx];
95 src->Register.SwizzleY = swiz[sy];
96 src->Register.SwizzleZ = swiz[sz];
97 src->Register.SwizzleW = swiz[sw];
98 }
99
100 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
101 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
102 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
103
104 /*
105 * if (dst.x aliases src.x) {
106 * MOV tmpA.x, src.x
107 * src = tmpA
108 * }
109 * COS dst.x, src.x
110 * SIN dst.y, src.x
111 * MOV dst.zw, imm{0.0, 1.0}
112 */
113 static bool
114 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
115 const struct tgsi_full_src_register *src, unsigned src_mask)
116 {
117 if ((dst->Register.File == src->Register.File) &&
118 (dst->Register.Index == src->Register.Index)) {
119 unsigned i, actual_mask = 0;
120 unsigned swiz[4];
121 get_swiz(swiz, &src->Register);
122 for (i = 0; i < 4; i++)
123 if (src_mask & (1 << i))
124 actual_mask |= (1 << swiz[i]);
125 if (actual_mask & dst_mask)
126 return true;
127 }
128 return false;
129 }
130
131 static void
132 create_mov(struct tgsi_transform_context *tctx,
133 const struct tgsi_full_dst_register *dst,
134 const struct tgsi_full_src_register *src,
135 unsigned mask, unsigned saturate)
136 {
137 struct tgsi_full_instruction new_inst;
138
139 new_inst = tgsi_default_full_instruction();
140 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
141 new_inst.Instruction.Saturate = saturate;
142 new_inst.Instruction.NumDstRegs = 1;
143 reg_dst(&new_inst.Dst[0], dst, mask);
144 new_inst.Instruction.NumSrcRegs = 1;
145 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
146 tctx->emit_instruction(tctx, &new_inst);
147 }
148
149 /*
150 * Lowering Translators:
151 */
152
153 /* DST - Distance Vector
154 * dst.x = 1.0
155 * dst.y = src0.y \times src1.y
156 * dst.z = src0.z
157 * dst.w = src1.w
158 *
159 * ; note: could be more clever and use just a single temp
160 * ; if I was clever enough to re-write the swizzles.
161 * ; needs: 2 tmp, imm{1.0}
162 * if (dst.y aliases src0.z) {
163 * MOV tmpA.yz, src0.yz
164 * src0 = tmpA
165 * }
166 * if (dst.yz aliases src1.w) {
167 * MOV tmpB.yw, src1.yw
168 * src1 = tmpB
169 * }
170 * MUL dst.y, src0.y, src1.y
171 * MOV dst.z, src0.z
172 * MOV dst.w, src1.w
173 * MOV dst.x, imm{1.0}
174 */
175 #define DST_GROW (19 - 4)
176 #define DST_TMP 2
177 static void
178 transform_dst(struct tgsi_transform_context *tctx,
179 struct tgsi_full_instruction *inst)
180 {
181 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
182 struct tgsi_full_dst_register *dst = &inst->Dst[0];
183 struct tgsi_full_src_register *src0 = &inst->Src[0];
184 struct tgsi_full_src_register *src1 = &inst->Src[1];
185 struct tgsi_full_instruction new_inst;
186
187 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
188 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
189 src0 = &ctx->tmp[A].src;
190 }
191
192 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
193 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
194 src1 = &ctx->tmp[B].src;
195 }
196
197 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
198 /* MUL dst.y, src0.y, src1.y */
199 new_inst = tgsi_default_full_instruction();
200 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
201 new_inst.Instruction.NumDstRegs = 1;
202 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
203 new_inst.Instruction.NumSrcRegs = 2;
204 reg_src(&new_inst.Src[0], src0, SWIZ(_,Y,_,_));
205 reg_src(&new_inst.Src[1], src1, SWIZ(_,Y,_,_));
206 tctx->emit_instruction(tctx, &new_inst);
207 }
208
209 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
210 /* MOV dst.z, src0.z */
211 new_inst = tgsi_default_full_instruction();
212 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
213 new_inst.Instruction.NumDstRegs = 1;
214 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
215 new_inst.Instruction.NumSrcRegs = 1;
216 reg_src(&new_inst.Src[0], src0, SWIZ(_,_,Z,_));
217 tctx->emit_instruction(tctx, &new_inst);
218 }
219
220 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
221 /* MOV dst.w, src1.w */
222 new_inst = tgsi_default_full_instruction();
223 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
224 new_inst.Instruction.NumDstRegs = 1;
225 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
226 new_inst.Instruction.NumSrcRegs = 1;
227 reg_src(&new_inst.Src[0], src1, SWIZ(_,_,_,W));
228 tctx->emit_instruction(tctx, &new_inst);
229 }
230
231 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
232 /* MOV dst.x, imm{1.0} */
233 new_inst = tgsi_default_full_instruction();
234 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
235 new_inst.Instruction.NumDstRegs = 1;
236 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
237 new_inst.Instruction.NumSrcRegs = 1;
238 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,_));
239 tctx->emit_instruction(tctx, &new_inst);
240 }
241 }
242
243 /* XPD - Cross Product
244 * dst.x = src0.y \times src1.z - src1.y \times src0.z
245 * dst.y = src0.z \times src1.x - src1.z \times src0.x
246 * dst.z = src0.x \times src1.y - src1.x \times src0.y
247 * dst.w = 1.0
248 *
249 * ; needs: 2 tmp, imm{1.0}
250 * MUL tmpA.xyz, src0.yzx, src1.zxy
251 * MUL tmpB.xyz, src1.yzx, src0.zxy
252 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
253 * MOV dst.w, imm{1.0}
254 */
255 #define XPD_GROW (15 - 4)
256 #define XPD_TMP 2
257 static void
258 transform_xpd(struct tgsi_transform_context *tctx,
259 struct tgsi_full_instruction *inst)
260 {
261 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
262 struct tgsi_full_dst_register *dst = &inst->Dst[0];
263 struct tgsi_full_src_register *src0 = &inst->Src[0];
264 struct tgsi_full_src_register *src1 = &inst->Src[1];
265 struct tgsi_full_instruction new_inst;
266
267 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
268 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
269 new_inst = tgsi_default_full_instruction();
270 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
271 new_inst.Instruction.NumDstRegs = 1;
272 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
273 new_inst.Instruction.NumSrcRegs = 2;
274 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Z,X,_));
275 reg_src(&new_inst.Src[1], src1, SWIZ(Z,X,Y,_));
276 tctx->emit_instruction(tctx, &new_inst);
277
278 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
279 new_inst = tgsi_default_full_instruction();
280 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
281 new_inst.Instruction.NumDstRegs = 1;
282 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
283 new_inst.Instruction.NumSrcRegs = 2;
284 reg_src(&new_inst.Src[0], src1, SWIZ(Y,Z,X,_));
285 reg_src(&new_inst.Src[1], src0, SWIZ(Z,X,Y,_));
286 tctx->emit_instruction(tctx, &new_inst);
287
288 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
289 new_inst = tgsi_default_full_instruction();
290 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
291 new_inst.Instruction.NumDstRegs = 1;
292 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
293 new_inst.Instruction.NumSrcRegs = 2;
294 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,_));
295 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,_));
296 tctx->emit_instruction(tctx, &new_inst);
297 }
298
299 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
300 /* MOV dst.w, imm{1.0} */
301 new_inst = tgsi_default_full_instruction();
302 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
303 new_inst.Instruction.NumDstRegs = 1;
304 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
305 new_inst.Instruction.NumSrcRegs = 1;
306 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
307 tctx->emit_instruction(tctx, &new_inst);
308 }
309 }
310
311 /* SCS - Sine Cosine
312 * dst.x = \cos{src.x}
313 * dst.y = \sin{src.x}
314 * dst.z = 0.0
315 * dst.w = 1.0
316 *
317 * ; needs: 1 tmp, imm{0.0, 1.0}
318 * if (dst.x aliases src.x) {
319 * MOV tmpA.x, src.x
320 * src = tmpA
321 * }
322 * COS dst.x, src.x
323 * SIN dst.y, src.x
324 * MOV dst.zw, imm{0.0, 1.0}
325 */
326 #define SCS_GROW (12 - 3)
327 #define SCS_TMP 1
328 static void
329 transform_scs(struct tgsi_transform_context *tctx,
330 struct tgsi_full_instruction *inst)
331 {
332 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
333 struct tgsi_full_dst_register *dst = &inst->Dst[0];
334 struct tgsi_full_src_register *src = &inst->Src[0];
335 struct tgsi_full_instruction new_inst;
336
337 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
338 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
339 src = &ctx->tmp[A].src;
340 }
341
342 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
343 /* COS dst.x, src.x */
344 new_inst = tgsi_default_full_instruction();
345 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
346 new_inst.Instruction.NumDstRegs = 1;
347 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
348 new_inst.Instruction.NumSrcRegs = 1;
349 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
350 tctx->emit_instruction(tctx, &new_inst);
351 }
352
353 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
354 /* SIN dst.y, src.x */
355 new_inst = tgsi_default_full_instruction();
356 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
357 new_inst.Instruction.NumDstRegs = 1;
358 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
359 new_inst.Instruction.NumSrcRegs = 1;
360 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
361 tctx->emit_instruction(tctx, &new_inst);
362 }
363
364 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
365 /* MOV dst.zw, imm{0.0, 1.0} */
366 new_inst = tgsi_default_full_instruction();
367 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
368 new_inst.Instruction.NumDstRegs = 1;
369 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
370 new_inst.Instruction.NumSrcRegs = 1;
371 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,X,Y));
372 tctx->emit_instruction(tctx, &new_inst);
373 }
374 }
375
376 /* LRP - Linear Interpolate
377 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
378 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
379 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
380 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
381 *
382 * ; needs: 2 tmp, imm{1.0}
383 * MUL tmpA, src0, src1
384 * SUB tmpB, imm{1.0}, src0
385 * MUL tmpB, tmpB, src2
386 * ADD dst, tmpA, tmpB
387 */
388 #define LRP_GROW (16 - 4)
389 #define LRP_TMP 2
390 static void
391 transform_lrp(struct tgsi_transform_context *tctx,
392 struct tgsi_full_instruction *inst)
393 {
394 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
395 struct tgsi_full_dst_register *dst = &inst->Dst[0];
396 struct tgsi_full_src_register *src0 = &inst->Src[0];
397 struct tgsi_full_src_register *src1 = &inst->Src[1];
398 struct tgsi_full_src_register *src2 = &inst->Src[2];
399 struct tgsi_full_instruction new_inst;
400
401 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
402 /* MUL tmpA, src0, src1 */
403 new_inst = tgsi_default_full_instruction();
404 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
405 new_inst.Instruction.NumDstRegs = 1;
406 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
407 new_inst.Instruction.NumSrcRegs = 2;
408 reg_src(&new_inst.Src[0], src0, SWIZ(X,Y,Z,W));
409 reg_src(&new_inst.Src[1], src1, SWIZ(X,Y,Z,W));
410 tctx->emit_instruction(tctx, &new_inst);
411
412 /* SUB tmpB, imm{1.0}, src0 */
413 new_inst = tgsi_default_full_instruction();
414 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
415 new_inst.Instruction.NumDstRegs = 1;
416 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
417 new_inst.Instruction.NumSrcRegs = 2;
418 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,Y,Y,Y));
419 reg_src(&new_inst.Src[1], src0, SWIZ(X,Y,Z,W));
420 tctx->emit_instruction(tctx, &new_inst);
421
422 /* MUL tmpB, tmpB, src2 */
423 new_inst = tgsi_default_full_instruction();
424 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
425 new_inst.Instruction.NumDstRegs = 1;
426 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
427 new_inst.Instruction.NumSrcRegs = 2;
428 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
429 reg_src(&new_inst.Src[1], src2, SWIZ(X,Y,Z,W));
430 tctx->emit_instruction(tctx, &new_inst);
431
432 /* ADD dst, tmpA, tmpB */
433 new_inst = tgsi_default_full_instruction();
434 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
435 new_inst.Instruction.NumDstRegs = 1;
436 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
437 new_inst.Instruction.NumSrcRegs = 2;
438 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
439 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
440 tctx->emit_instruction(tctx, &new_inst);
441 }
442 }
443
444 /* FRC - Fraction
445 * dst.x = src.x - \lfloor src.x\rfloor
446 * dst.y = src.y - \lfloor src.y\rfloor
447 * dst.z = src.z - \lfloor src.z\rfloor
448 * dst.w = src.w - \lfloor src.w\rfloor
449 *
450 * ; needs: 1 tmp
451 * FLR tmpA, src
452 * SUB dst, src, tmpA
453 */
454 #define FRC_GROW (7 - 3)
455 #define FRC_TMP 1
456 static void
457 transform_frc(struct tgsi_transform_context *tctx,
458 struct tgsi_full_instruction *inst)
459 {
460 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
461 struct tgsi_full_dst_register *dst = &inst->Dst[0];
462 struct tgsi_full_src_register *src = &inst->Src[0];
463 struct tgsi_full_instruction new_inst;
464
465 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
466 /* FLR tmpA, src */
467 new_inst = tgsi_default_full_instruction();
468 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
469 new_inst.Instruction.NumDstRegs = 1;
470 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
471 new_inst.Instruction.NumSrcRegs = 1;
472 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
473 tctx->emit_instruction(tctx, &new_inst);
474
475 /* SUB dst, src, tmpA */
476 new_inst = tgsi_default_full_instruction();
477 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
478 new_inst.Instruction.NumDstRegs = 1;
479 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
480 new_inst.Instruction.NumSrcRegs = 2;
481 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
482 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
483 tctx->emit_instruction(tctx, &new_inst);
484 }
485 }
486
487 /* POW - Power
488 * dst.x = src0.x^{src1.x}
489 * dst.y = src0.x^{src1.x}
490 * dst.z = src0.x^{src1.x}
491 * dst.w = src0.x^{src1.x}
492 *
493 * ; needs: 1 tmp
494 * LG2 tmpA.x, src0.x
495 * MUL tmpA.x, src1.x, tmpA.x
496 * EX2 dst, tmpA.x
497 */
498 #define POW_GROW (10 - 4)
499 #define POW_TMP 1
500 static void
501 transform_pow(struct tgsi_transform_context *tctx,
502 struct tgsi_full_instruction *inst)
503 {
504 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
505 struct tgsi_full_dst_register *dst = &inst->Dst[0];
506 struct tgsi_full_src_register *src0 = &inst->Src[0];
507 struct tgsi_full_src_register *src1 = &inst->Src[1];
508 struct tgsi_full_instruction new_inst;
509
510 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
511 /* LG2 tmpA.x, src0.x */
512 new_inst = tgsi_default_full_instruction();
513 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
514 new_inst.Instruction.NumDstRegs = 1;
515 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
516 new_inst.Instruction.NumSrcRegs = 1;
517 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
518 tctx->emit_instruction(tctx, &new_inst);
519
520 /* MUL tmpA.x, src1.x, tmpA.x */
521 new_inst = tgsi_default_full_instruction();
522 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
523 new_inst.Instruction.NumDstRegs = 1;
524 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
525 new_inst.Instruction.NumSrcRegs = 2;
526 reg_src(&new_inst.Src[0], src1, SWIZ(X,_,_,_));
527 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,_,_,_));
528 tctx->emit_instruction(tctx, &new_inst);
529
530 /* EX2 dst, tmpA.x */
531 new_inst = tgsi_default_full_instruction();
532 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
533 new_inst.Instruction.NumDstRegs = 1;
534 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
535 new_inst.Instruction.NumSrcRegs = 1;
536 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
537 tctx->emit_instruction(tctx, &new_inst);
538 }
539 }
540
541 /* LIT - Light Coefficients
542 * dst.x = 1.0
543 * dst.y = max(src.x, 0.0)
544 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
545 * dst.w = 1.0
546 *
547 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
548 * MAX tmpA.xy, src.xy, imm{0.0}
549 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
550 * LG2 tmpA.y, tmpA.y
551 * MUL tmpA.y, tmpA.z, tmpA.y
552 * EX2 tmpA.y, tmpA.y
553 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
554 * MOV dst.yz, tmpA.xy
555 * MOV dst.xw, imm{1.0}
556 */
557 #define LIT_GROW (30 - 3)
558 #define LIT_TMP 1
559 static void
560 transform_lit(struct tgsi_transform_context *tctx,
561 struct tgsi_full_instruction *inst)
562 {
563 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
564 struct tgsi_full_dst_register *dst = &inst->Dst[0];
565 struct tgsi_full_src_register *src = &inst->Src[0];
566 struct tgsi_full_instruction new_inst;
567
568 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
569 /* MAX tmpA.xy, src.xy, imm{0.0} */
570 new_inst = tgsi_default_full_instruction();
571 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
572 new_inst.Instruction.NumDstRegs = 1;
573 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
574 new_inst.Instruction.NumSrcRegs = 2;
575 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,_,_));
576 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X,X,_,_));
577 tctx->emit_instruction(tctx, &new_inst);
578
579 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
580 new_inst = tgsi_default_full_instruction();
581 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
582 new_inst.Instruction.NumDstRegs = 1;
583 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
584 new_inst.Instruction.NumSrcRegs = 3;
585 reg_src(&new_inst.Src[0], src, SWIZ(_,_,W,_));
586 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_,_,Z,_));
587 new_inst.Src[1].Register.Negate = true;
588 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,_,Z,_));
589 tctx->emit_instruction(tctx, &new_inst);
590
591 /* LG2 tmpA.y, tmpA.y */
592 new_inst = tgsi_default_full_instruction();
593 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
594 new_inst.Instruction.NumDstRegs = 1;
595 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
596 new_inst.Instruction.NumSrcRegs = 1;
597 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
598 tctx->emit_instruction(tctx, &new_inst);
599
600 /* MUL tmpA.y, tmpA.z, tmpA.y */
601 new_inst = tgsi_default_full_instruction();
602 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
603 new_inst.Instruction.NumDstRegs = 1;
604 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
605 new_inst.Instruction.NumSrcRegs = 2;
606 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
607 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
608 tctx->emit_instruction(tctx, &new_inst);
609
610 /* EX2 tmpA.y, tmpA.y */
611 new_inst = tgsi_default_full_instruction();
612 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
613 new_inst.Instruction.NumDstRegs = 1;
614 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
615 new_inst.Instruction.NumSrcRegs = 1;
616 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
617 tctx->emit_instruction(tctx, &new_inst);
618
619 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
620 new_inst = tgsi_default_full_instruction();
621 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
622 new_inst.Instruction.NumDstRegs = 1;
623 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
624 new_inst.Instruction.NumSrcRegs = 3;
625 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
626 new_inst.Src[0].Register.Negate = true;
627 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
628 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,X,_,_));
629 tctx->emit_instruction(tctx, &new_inst);
630
631 /* MOV dst.yz, tmpA.xy */
632 new_inst = tgsi_default_full_instruction();
633 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
634 new_inst.Instruction.NumDstRegs = 1;
635 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
636 new_inst.Instruction.NumSrcRegs = 1;
637 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,Y,_));
638 tctx->emit_instruction(tctx, &new_inst);
639 }
640
641 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
642 /* MOV dst.xw, imm{1.0} */
643 new_inst = tgsi_default_full_instruction();
644 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
645 new_inst.Instruction.NumDstRegs = 1;
646 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
647 new_inst.Instruction.NumSrcRegs = 1;
648 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,Y));
649 tctx->emit_instruction(tctx, &new_inst);
650 }
651 }
652
653 /* EXP - Approximate Exponential Base 2
654 * dst.x = 2^{\lfloor src.x\rfloor}
655 * dst.y = src.x - \lfloor src.x\rfloor
656 * dst.z = 2^{src.x}
657 * dst.w = 1.0
658 *
659 * ; needs: 1 tmp, imm{1.0}
660 * FLR tmpA.x, src.x
661 * EX2 tmpA.y, src.x
662 * SUB dst.y, src.x, tmpA.x
663 * EX2 dst.x, tmpA.x
664 * MOV dst.z, tmpA.y
665 * MOV dst.w, imm{1.0}
666 */
667 #define EXP_GROW (19 - 3)
668 #define EXP_TMP 1
669 static void
670 transform_exp(struct tgsi_transform_context *tctx,
671 struct tgsi_full_instruction *inst)
672 {
673 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
674 struct tgsi_full_dst_register *dst = &inst->Dst[0];
675 struct tgsi_full_src_register *src = &inst->Src[0];
676 struct tgsi_full_instruction new_inst;
677
678 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
679 /* FLR tmpA.x, src.x */
680 new_inst = tgsi_default_full_instruction();
681 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
682 new_inst.Instruction.NumDstRegs = 1;
683 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
684 new_inst.Instruction.NumSrcRegs = 1;
685 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
686 tctx->emit_instruction(tctx, &new_inst);
687 }
688
689 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
690 /* EX2 tmpA.y, src.x */
691 new_inst = tgsi_default_full_instruction();
692 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
693 new_inst.Instruction.NumDstRegs = 1;
694 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
695 new_inst.Instruction.NumSrcRegs = 1;
696 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
697 tctx->emit_instruction(tctx, &new_inst);
698 }
699
700 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
701 /* SUB dst.y, src.x, tmpA.x */
702 new_inst = tgsi_default_full_instruction();
703 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
704 new_inst.Instruction.NumDstRegs = 1;
705 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
706 new_inst.Instruction.NumSrcRegs = 2;
707 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
708 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,X,_,_));
709 tctx->emit_instruction(tctx, &new_inst);
710 }
711
712 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
713 /* EX2 dst.x, tmpA.x */
714 new_inst = tgsi_default_full_instruction();
715 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
716 new_inst.Instruction.NumDstRegs = 1;
717 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
718 new_inst.Instruction.NumSrcRegs = 1;
719 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
720 tctx->emit_instruction(tctx, &new_inst);
721 }
722
723 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
724 /* MOV dst.z, tmpA.y */
725 new_inst = tgsi_default_full_instruction();
726 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
727 new_inst.Instruction.NumDstRegs = 1;
728 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
729 new_inst.Instruction.NumSrcRegs = 1;
730 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,_,Y,_));
731 tctx->emit_instruction(tctx, &new_inst);
732 }
733
734 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
735 /* MOV dst.w, imm{1.0} */
736 new_inst = tgsi_default_full_instruction();
737 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
738 new_inst.Instruction.NumDstRegs = 1;
739 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
740 new_inst.Instruction.NumSrcRegs = 1;
741 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
742 tctx->emit_instruction(tctx, &new_inst);
743 }
744 }
745
746 /* LOG - Approximate Logarithm Base 2
747 * dst.x = \lfloor\log_2{|src.x|}\rfloor
748 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
749 * dst.z = \log_2{|src.x|}
750 * dst.w = 1.0
751 *
752 * ; needs: 1 tmp, imm{1.0}
753 * LG2 tmpA.x, |src.x|
754 * FLR tmpA.y, tmpA.x
755 * EX2 tmpA.z, tmpA.y
756 * RCP tmpA.z, tmpA.z
757 * MUL dst.y, |src.x|, tmpA.z
758 * MOV dst.xz, tmpA.yx
759 * MOV dst.w, imm{1.0}
760 */
761 #define LOG_GROW (25 - 3)
762 #define LOG_TMP 1
763 static void
764 transform_log(struct tgsi_transform_context *tctx,
765 struct tgsi_full_instruction *inst)
766 {
767 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
768 struct tgsi_full_dst_register *dst = &inst->Dst[0];
769 struct tgsi_full_src_register *src = &inst->Src[0];
770 struct tgsi_full_instruction new_inst;
771
772 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
773 /* LG2 tmpA.x, |src.x| */
774 new_inst = tgsi_default_full_instruction();
775 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
776 new_inst.Instruction.NumDstRegs = 1;
777 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
778 new_inst.Instruction.NumSrcRegs = 1;
779 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
780 new_inst.Src[0].Register.Absolute = true;
781 tctx->emit_instruction(tctx, &new_inst);
782 }
783
784 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
785 /* FLR tmpA.y, tmpA.x */
786 new_inst = tgsi_default_full_instruction();
787 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
788 new_inst.Instruction.NumDstRegs = 1;
789 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
790 new_inst.Instruction.NumSrcRegs = 1;
791 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,_,_));
792 tctx->emit_instruction(tctx, &new_inst);
793 }
794
795 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
796 /* EX2 tmpA.z, tmpA.y */
797 new_inst = tgsi_default_full_instruction();
798 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
799 new_inst.Instruction.NumDstRegs = 1;
800 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
801 new_inst.Instruction.NumSrcRegs = 1;
802 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
803 tctx->emit_instruction(tctx, &new_inst);
804
805 /* RCP tmpA.z, tmpA.z */
806 new_inst = tgsi_default_full_instruction();
807 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
808 new_inst.Instruction.NumDstRegs = 1;
809 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
810 new_inst.Instruction.NumSrcRegs = 1;
811 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z,_,_,_));
812 tctx->emit_instruction(tctx, &new_inst);
813
814 /* MUL dst.y, |src.x|, tmpA.z */
815 new_inst = tgsi_default_full_instruction();
816 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
817 new_inst.Instruction.NumDstRegs = 1;
818 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
819 new_inst.Instruction.NumSrcRegs = 2;
820 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
821 new_inst.Src[0].Register.Absolute = true;
822 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
823 tctx->emit_instruction(tctx, &new_inst);
824 }
825
826 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
827 /* MOV dst.xz, tmpA.yx */
828 new_inst = tgsi_default_full_instruction();
829 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
830 new_inst.Instruction.NumDstRegs = 1;
831 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
832 new_inst.Instruction.NumSrcRegs = 1;
833 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,X,_));
834 tctx->emit_instruction(tctx, &new_inst);
835 }
836
837 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
838 /* MOV dst.w, imm{1.0} */
839 new_inst = tgsi_default_full_instruction();
840 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
841 new_inst.Instruction.NumDstRegs = 1;
842 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
843 new_inst.Instruction.NumSrcRegs = 1;
844 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
845 tctx->emit_instruction(tctx, &new_inst);
846 }
847 }
848
849 /* DP4 - 4-component Dot Product
850 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
851 *
852 * DP3 - 3-component Dot Product
853 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
854 *
855 * DPH - Homogeneous Dot Product
856 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
857 *
858 * DP2 - 2-component Dot Product
859 * dst = src0.x \times src1.x + src0.y \times src1.y
860 *
861 * DP2A - 2-component Dot Product And Add
862 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
863 *
864 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
865 * operations, which is what you'd prefer for a ISA that is natively
866 * scalar. Probably a native vector ISA would at least already have
867 * DP4/DP3 instructions, but perhaps there is room for an alternative
868 * translation for DPH/DP2/DP2A using vector instructions.
869 *
870 * ; needs: 1 tmp
871 * MUL tmpA.x, src0.x, src1.x
872 * MAD tmpA.x, src0.y, src1.y, tmpA.x
873 * if (DPH || DP3 || DP4) {
874 * MAD tmpA.x, src0.z, src1.z, tmpA.x
875 * if (DPH) {
876 * ADD tmpA.x, src1.w, tmpA.x
877 * } else if (DP4) {
878 * MAD tmpA.x, src0.w, src1.w, tmpA.x
879 * }
880 * } else if (DP2A) {
881 * ADD tmpA.x, src2.x, tmpA.x
882 * }
883 * ; fixup last instruction to replicate into dst
884 */
885 #define DP4_GROW (19 - 4)
886 #define DP3_GROW (14 - 4)
887 #define DPH_GROW (18 - 4)
888 #define DP2_GROW ( 9 - 4)
889 #define DP2A_GROW (13 - 4)
890 #define DOTP_TMP 1
891 static void
892 transform_dotp(struct tgsi_transform_context *tctx,
893 struct tgsi_full_instruction *inst)
894 {
895 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
896 struct tgsi_full_dst_register *dst = &inst->Dst[0];
897 struct tgsi_full_src_register *src0 = &inst->Src[0];
898 struct tgsi_full_src_register *src1 = &inst->Src[1];
899 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
900 struct tgsi_full_instruction new_inst;
901 unsigned opcode = inst->Instruction.Opcode;
902
903 /* NOTE: any potential last instruction must replicate src on all
904 * components (since it could be re-written to write to final dst)
905 */
906
907 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
908 /* MUL tmpA.x, src0.x, src1.x */
909 new_inst = tgsi_default_full_instruction();
910 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
911 new_inst.Instruction.NumDstRegs = 1;
912 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
913 new_inst.Instruction.NumSrcRegs = 2;
914 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
915 reg_src(&new_inst.Src[1], src1, SWIZ(X,_,_,_));
916 tctx->emit_instruction(tctx, &new_inst);
917
918 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
919 new_inst = tgsi_default_full_instruction();
920 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
921 new_inst.Instruction.NumDstRegs = 1;
922 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
923 new_inst.Instruction.NumSrcRegs = 3;
924 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Y,Y,Y));
925 reg_src(&new_inst.Src[1], src1, SWIZ(Y,Y,Y,Y));
926 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
927
928 if ((opcode == TGSI_OPCODE_DPH) ||
929 (opcode == TGSI_OPCODE_DP3) ||
930 (opcode == TGSI_OPCODE_DP4)) {
931 tctx->emit_instruction(tctx, &new_inst);
932
933 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
934 new_inst = tgsi_default_full_instruction();
935 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
936 new_inst.Instruction.NumDstRegs = 1;
937 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
938 new_inst.Instruction.NumSrcRegs = 3;
939 reg_src(&new_inst.Src[0], src0, SWIZ(Z,Z,Z,Z));
940 reg_src(&new_inst.Src[1], src1, SWIZ(Z,Z,Z,Z));
941 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
942
943 if (opcode == TGSI_OPCODE_DPH) {
944 tctx->emit_instruction(tctx, &new_inst);
945
946 /* ADD tmpA.x, src1.w, tmpA.x */
947 new_inst = tgsi_default_full_instruction();
948 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
949 new_inst.Instruction.NumDstRegs = 1;
950 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
951 new_inst.Instruction.NumSrcRegs = 2;
952 reg_src(&new_inst.Src[0], src1, SWIZ(W,W,W,W));
953 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
954 } else if (opcode == TGSI_OPCODE_DP4) {
955 tctx->emit_instruction(tctx, &new_inst);
956
957 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
958 new_inst = tgsi_default_full_instruction();
959 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
960 new_inst.Instruction.NumDstRegs = 1;
961 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
962 new_inst.Instruction.NumSrcRegs = 3;
963 reg_src(&new_inst.Src[0], src0, SWIZ(W,W,W,W));
964 reg_src(&new_inst.Src[1], src1, SWIZ(W,W,W,W));
965 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
966 }
967 } else if (opcode == TGSI_OPCODE_DP2A) {
968 tctx->emit_instruction(tctx, &new_inst);
969
970 /* ADD tmpA.x, src2.x, tmpA.x */
971 new_inst = tgsi_default_full_instruction();
972 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
973 new_inst.Instruction.NumDstRegs = 1;
974 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
975 new_inst.Instruction.NumSrcRegs = 2;
976 reg_src(&new_inst.Src[0], src2, SWIZ(X,X,X,X));
977 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
978 }
979
980 /* fixup last instruction to write to dst: */
981 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
982
983 tctx->emit_instruction(tctx, &new_inst);
984 }
985 }
986
987 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
988 * in the case of TXP, the clamping must happen *after* projection, so
989 * we need to lower TXP to TEX.
990 *
991 * MOV tmpA, src0
992 * if (opc == TXP) {
993 * ; do perspective division manually before clamping:
994 * RCP tmpB, tmpA.w
995 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
996 * opc = TEX;
997 * }
998 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
999 * <opc> dst, tmpA, ...
1000 */
1001 #define SAMP_GROW (13)
1002 #define SAMP_TMP 2
1003 static int
1004 transform_samp(struct tgsi_transform_context *tctx,
1005 struct tgsi_full_instruction *inst)
1006 {
1007 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1008 struct tgsi_full_src_register *coord = &inst->Src[0];
1009 struct tgsi_full_src_register *samp;
1010 struct tgsi_full_instruction new_inst;
1011 /* mask is clamped coords, pmask is all coords (for projection): */
1012 unsigned mask = 0, pmask = 0, smask;
1013 unsigned opcode = inst->Instruction.Opcode;
1014
1015 if (opcode == TGSI_OPCODE_TXB2) {
1016 samp = &inst->Src[2];
1017 } else {
1018 samp = &inst->Src[1];
1019 }
1020
1021 /* convert sampler # to bitmask to test: */
1022 smask = 1 << samp->Register.Index;
1023
1024 /* check if we actually need to lower this one: */
1025 if (!(ctx->saturate & smask))
1026 return -1;
1027
1028 /* figure out which coordinates need saturating:
1029 * - RECT textures should not get saturated
1030 * - array index coords should not get saturated
1031 */
1032 switch (inst->Texture.Texture) {
1033 case TGSI_TEXTURE_3D:
1034 case TGSI_TEXTURE_CUBE:
1035 case TGSI_TEXTURE_CUBE_ARRAY:
1036 case TGSI_TEXTURE_SHADOWCUBE:
1037 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1038 if (ctx->config->saturate_r & smask)
1039 mask |= TGSI_WRITEMASK_Z;
1040 pmask |= TGSI_WRITEMASK_Z;
1041 /* fallthrough */
1042
1043 case TGSI_TEXTURE_2D:
1044 case TGSI_TEXTURE_2D_ARRAY:
1045 case TGSI_TEXTURE_SHADOW2D:
1046 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1047 case TGSI_TEXTURE_2D_MSAA:
1048 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1049 if (ctx->config->saturate_t & smask)
1050 mask |= TGSI_WRITEMASK_Y;
1051 pmask |= TGSI_WRITEMASK_Y;
1052 /* fallthrough */
1053
1054 case TGSI_TEXTURE_1D:
1055 case TGSI_TEXTURE_1D_ARRAY:
1056 case TGSI_TEXTURE_SHADOW1D:
1057 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1058 if (ctx->config->saturate_s & smask)
1059 mask |= TGSI_WRITEMASK_X;
1060 pmask |= TGSI_WRITEMASK_X;
1061 break;
1062
1063 /* TODO: I think we should ignore these?
1064 case TGSI_TEXTURE_RECT:
1065 case TGSI_TEXTURE_SHADOWRECT:
1066 */
1067 }
1068
1069 /* sanity check.. driver could be asking to saturate a non-
1070 * existent coordinate component:
1071 */
1072 if (!mask)
1073 return -1;
1074
1075 /* MOV tmpA, src0 */
1076 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1077
1078 /* This is a bit sad.. we need to clamp *after* the coords
1079 * are projected, which means lowering TXP to TEX and doing
1080 * the projection ourself. But since I haven't figured out
1081 * how to make the lowering code deliver an electric shock
1082 * to anyone using GL_CLAMP, we must do this instead:
1083 */
1084 if (opcode == TGSI_OPCODE_TXP) {
1085 /* RCP tmpB.x tmpA.w */
1086 new_inst = tgsi_default_full_instruction();
1087 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1088 new_inst.Instruction.NumDstRegs = 1;
1089 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1090 new_inst.Instruction.NumSrcRegs = 1;
1091 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W,_,_,_));
1092 tctx->emit_instruction(tctx, &new_inst);
1093
1094 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1095 new_inst = tgsi_default_full_instruction();
1096 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1097 new_inst.Instruction.NumDstRegs = 1;
1098 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1099 new_inst.Instruction.NumSrcRegs = 2;
1100 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
1101 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,X,X,X));
1102 tctx->emit_instruction(tctx, &new_inst);
1103
1104 opcode = TGSI_OPCODE_TEX;
1105 }
1106
1107 /* MOV_SAT tmpA.<mask>, tmpA */
1108 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
1109 TGSI_SAT_ZERO_ONE);
1110
1111 /* modify the texture samp instruction to take fixed up coord: */
1112 new_inst = *inst;
1113 new_inst.Instruction.Opcode = opcode;
1114 new_inst.Src[0] = ctx->tmp[A].src;
1115 tctx->emit_instruction(tctx, &new_inst);
1116
1117 return 0;
1118 }
1119
1120 /* Two-sided color emulation:
1121 * For each COLOR input, create a corresponding BCOLOR input, plus
1122 * CMP instruction to select front or back color based on FACE
1123 */
1124 #define TWOSIDE_GROW(n) ( \
1125 2 + /* FACE */ \
1126 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1127 ((n) * 1) + /* TEMP[] */ \
1128 ((n) * 5) /* CMP instr */ \
1129 )
1130
1131 static void
1132 emit_twoside(struct tgsi_transform_context *tctx)
1133 {
1134 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1135 struct tgsi_shader_info *info = ctx->info;
1136 struct tgsi_full_declaration decl;
1137 struct tgsi_full_instruction new_inst;
1138 unsigned inbase, tmpbase;
1139 int i;
1140
1141 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1142 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1143
1144 /* additional inputs for BCOLOR's */
1145 for (i = 0; i < ctx->two_side_colors; i++) {
1146 decl = tgsi_default_full_declaration();
1147 decl.Declaration.File = TGSI_FILE_INPUT;
1148 decl.Declaration.Semantic = true;
1149 decl.Range.First = decl.Range.Last = inbase + i;
1150 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1151 decl.Semantic.Index =
1152 info->input_semantic_index[ctx->two_side_idx[i]];
1153 tctx->emit_declaration(tctx, &decl);
1154 }
1155
1156 /* additional input for FACE */
1157 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1158 decl = tgsi_default_full_declaration();
1159 decl.Declaration.File = TGSI_FILE_INPUT;
1160 decl.Declaration.Semantic = true;
1161 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1162 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1163 decl.Semantic.Index = 0;
1164 tctx->emit_declaration(tctx, &decl);
1165
1166 ctx->face_idx = decl.Range.First;
1167 }
1168
1169 /* additional temps for COLOR/BCOLOR selection: */
1170 for (i = 0; i < ctx->two_side_colors; i++) {
1171 decl = tgsi_default_full_declaration();
1172 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1173 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1174 tctx->emit_declaration(tctx, &decl);
1175 }
1176
1177 /* and finally additional instructions to select COLOR/BCOLOR: */
1178 for (i = 0; i < ctx->two_side_colors; i++) {
1179 new_inst = tgsi_default_full_instruction();
1180 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1181
1182 new_inst.Instruction.NumDstRegs = 1;
1183 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1184 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1185 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1186
1187 new_inst.Instruction.NumSrcRegs = 3;
1188 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1189 new_inst.Src[0].Register.Index = ctx->face_idx;
1190 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1191 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1192 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1193 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1194 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1195 new_inst.Src[1].Register.Index = inbase + i;
1196 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1197 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1198 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1199 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1200 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1201 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1202 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1203 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1204 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1205 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1206
1207 tctx->emit_instruction(tctx, &new_inst);
1208 }
1209 }
1210
1211 static void
1212 emit_decls(struct tgsi_transform_context *tctx)
1213 {
1214 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1215 struct tgsi_shader_info *info = ctx->info;
1216 struct tgsi_full_declaration decl;
1217 struct tgsi_full_immediate immed;
1218 unsigned tmpbase;
1219 int i;
1220
1221 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1222
1223 ctx->color_base = tmpbase + ctx->numtmp;
1224
1225 /* declare immediate: */
1226 immed = tgsi_default_full_immediate();
1227 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1228 immed.u[0].Float = 0.0;
1229 immed.u[1].Float = 1.0;
1230 immed.u[2].Float = 128.0;
1231 immed.u[3].Float = 0.0;
1232 tctx->emit_immediate(tctx, &immed);
1233
1234 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1235 ctx->imm.Register.Index = info->immediate_count;
1236 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1237 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1238 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1239 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1240
1241 /* declare temp regs: */
1242 for (i = 0; i < ctx->numtmp; i++) {
1243 decl = tgsi_default_full_declaration();
1244 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1245 decl.Range.First = decl.Range.Last = tmpbase + i;
1246 tctx->emit_declaration(tctx, &decl);
1247
1248 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1249 ctx->tmp[i].src.Register.Index = tmpbase + i;
1250 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1251 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1252 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1253 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1254
1255 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1256 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1257 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1258 }
1259
1260 if (ctx->two_side_colors)
1261 emit_twoside(tctx);
1262 }
1263
1264 static void
1265 rename_color_inputs(struct fd_lowering_context *ctx,
1266 struct tgsi_full_instruction *inst)
1267 {
1268 unsigned i, j;
1269 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1270 struct tgsi_src_register *src = &inst->Src[i].Register;
1271 if (src->File == TGSI_FILE_INPUT) {
1272 for (j = 0; j < ctx->two_side_colors; j++) {
1273 if (src->Index == ctx->two_side_idx[j]) {
1274 src->File = TGSI_FILE_TEMPORARY;
1275 src->Index = ctx->color_base + j;
1276 break;
1277 }
1278 }
1279 }
1280 }
1281
1282 }
1283
1284 static void
1285 transform_instr(struct tgsi_transform_context *tctx,
1286 struct tgsi_full_instruction *inst)
1287 {
1288 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1289
1290 if (!ctx->emitted_decls) {
1291 emit_decls(tctx);
1292 ctx->emitted_decls = 1;
1293 }
1294
1295 /* if emulating two-sided-color, we need to re-write some
1296 * src registers:
1297 */
1298 if (ctx->two_side_colors)
1299 rename_color_inputs(ctx, inst);
1300
1301 switch (inst->Instruction.Opcode) {
1302 case TGSI_OPCODE_DST:
1303 if (!ctx->config->lower_DST)
1304 goto skip;
1305 transform_dst(tctx, inst);
1306 break;
1307 case TGSI_OPCODE_XPD:
1308 if (!ctx->config->lower_XPD)
1309 goto skip;
1310 transform_xpd(tctx, inst);
1311 break;
1312 case TGSI_OPCODE_SCS:
1313 if (!ctx->config->lower_SCS)
1314 goto skip;
1315 transform_scs(tctx, inst);
1316 break;
1317 case TGSI_OPCODE_LRP:
1318 if (!ctx->config->lower_LRP)
1319 goto skip;
1320 transform_lrp(tctx, inst);
1321 break;
1322 case TGSI_OPCODE_FRC:
1323 if (!ctx->config->lower_FRC)
1324 goto skip;
1325 transform_frc(tctx, inst);
1326 break;
1327 case TGSI_OPCODE_POW:
1328 if (!ctx->config->lower_POW)
1329 goto skip;
1330 transform_pow(tctx, inst);
1331 break;
1332 case TGSI_OPCODE_LIT:
1333 if (!ctx->config->lower_LIT)
1334 goto skip;
1335 transform_lit(tctx, inst);
1336 break;
1337 case TGSI_OPCODE_EXP:
1338 if (!ctx->config->lower_EXP)
1339 goto skip;
1340 transform_exp(tctx, inst);
1341 break;
1342 case TGSI_OPCODE_LOG:
1343 if (!ctx->config->lower_LOG)
1344 goto skip;
1345 transform_log(tctx, inst);
1346 break;
1347 case TGSI_OPCODE_DP4:
1348 if (!ctx->config->lower_DP4)
1349 goto skip;
1350 transform_dotp(tctx, inst);
1351 break;
1352 case TGSI_OPCODE_DP3:
1353 if (!ctx->config->lower_DP3)
1354 goto skip;
1355 transform_dotp(tctx, inst);
1356 break;
1357 case TGSI_OPCODE_DPH:
1358 if (!ctx->config->lower_DPH)
1359 goto skip;
1360 transform_dotp(tctx, inst);
1361 break;
1362 case TGSI_OPCODE_DP2:
1363 if (!ctx->config->lower_DP2)
1364 goto skip;
1365 transform_dotp(tctx, inst);
1366 break;
1367 case TGSI_OPCODE_DP2A:
1368 if (!ctx->config->lower_DP2A)
1369 goto skip;
1370 transform_dotp(tctx, inst);
1371 break;
1372 case TGSI_OPCODE_TEX:
1373 case TGSI_OPCODE_TXP:
1374 case TGSI_OPCODE_TXB:
1375 case TGSI_OPCODE_TXB2:
1376 case TGSI_OPCODE_TXL:
1377 if (transform_samp(tctx, inst))
1378 goto skip;
1379 break;
1380 default:
1381 skip:
1382 tctx->emit_instruction(tctx, inst);
1383 break;
1384 }
1385 }
1386
1387 /* returns NULL if no lowering required, else returns the new
1388 * tokens (which caller is required to free()). In either case
1389 * returns the current info.
1390 */
1391 const struct tgsi_token *
1392 fd_transform_lowering(const struct fd_lowering_config *config,
1393 const struct tgsi_token *tokens,
1394 struct tgsi_shader_info *info)
1395 {
1396 struct fd_lowering_context ctx;
1397 struct tgsi_token *newtoks;
1398 int newlen, numtmp;
1399
1400 /* sanity check in case limit is ever increased: */
1401 assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1402
1403 memset(&ctx, 0, sizeof(ctx));
1404 ctx.base.transform_instruction = transform_instr;
1405 ctx.info = info;
1406 ctx.config = config;
1407
1408 tgsi_scan_shader(tokens, info);
1409
1410 /* if we are adding fragment shader support to emulate two-sided
1411 * color, then figure out the number of additional inputs we need
1412 * to create for BCOLOR's..
1413 */
1414 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1415 config->color_two_side) {
1416 int i;
1417 ctx.face_idx = -1;
1418 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1419 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1420 ctx.two_side_idx[ctx.two_side_colors++] = i;
1421 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1422 ctx.face_idx = i;
1423 }
1424 }
1425
1426 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1427
1428 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1429 /* if there are no instructions to lower, then we are done: */
1430 if (!(OPCS(DST) ||
1431 OPCS(XPD) ||
1432 OPCS(SCS) ||
1433 OPCS(LRP) ||
1434 OPCS(FRC) ||
1435 OPCS(POW) ||
1436 OPCS(LIT) ||
1437 OPCS(EXP) ||
1438 OPCS(LOG) ||
1439 OPCS(DP4) ||
1440 OPCS(DP3) ||
1441 OPCS(DPH) ||
1442 OPCS(DP2) ||
1443 OPCS(DP2A) ||
1444 ctx.two_side_colors ||
1445 ctx.saturate))
1446 return NULL;
1447
1448 #if 0 /* debug */
1449 _debug_printf("BEFORE:");
1450 tgsi_dump(tokens, 0);
1451 #endif
1452
1453 numtmp = 0;
1454 newlen = tgsi_num_tokens(tokens);
1455 if (OPCS(DST)) {
1456 newlen += DST_GROW * OPCS(DST);
1457 numtmp = MAX2(numtmp, DST_TMP);
1458 }
1459 if (OPCS(XPD)) {
1460 newlen += XPD_GROW * OPCS(XPD);
1461 numtmp = MAX2(numtmp, XPD_TMP);
1462 }
1463 if (OPCS(SCS)) {
1464 newlen += SCS_GROW * OPCS(SCS);
1465 numtmp = MAX2(numtmp, SCS_TMP);
1466 }
1467 if (OPCS(LRP)) {
1468 newlen += LRP_GROW * OPCS(LRP);
1469 numtmp = MAX2(numtmp, LRP_TMP);
1470 }
1471 if (OPCS(FRC)) {
1472 newlen += FRC_GROW * OPCS(FRC);
1473 numtmp = MAX2(numtmp, FRC_TMP);
1474 }
1475 if (OPCS(POW)) {
1476 newlen += POW_GROW * OPCS(POW);
1477 numtmp = MAX2(numtmp, POW_TMP);
1478 }
1479 if (OPCS(LIT)) {
1480 newlen += LIT_GROW * OPCS(LIT);
1481 numtmp = MAX2(numtmp, LIT_TMP);
1482 }
1483 if (OPCS(EXP)) {
1484 newlen += EXP_GROW * OPCS(EXP);
1485 numtmp = MAX2(numtmp, EXP_TMP);
1486 }
1487 if (OPCS(LOG)) {
1488 newlen += LOG_GROW * OPCS(LOG);
1489 numtmp = MAX2(numtmp, LOG_TMP);
1490 }
1491 if (OPCS(DP4)) {
1492 newlen += DP4_GROW * OPCS(DP4);
1493 numtmp = MAX2(numtmp, DOTP_TMP);
1494 }
1495 if (OPCS(DP3)) {
1496 newlen += DP3_GROW * OPCS(DP3);
1497 numtmp = MAX2(numtmp, DOTP_TMP);
1498 }
1499 if (OPCS(DPH)) {
1500 newlen += DPH_GROW * OPCS(DPH);
1501 numtmp = MAX2(numtmp, DOTP_TMP);
1502 }
1503 if (OPCS(DP2)) {
1504 newlen += DP2_GROW * OPCS(DP2);
1505 numtmp = MAX2(numtmp, DOTP_TMP);
1506 }
1507 if (OPCS(DP2A)) {
1508 newlen += DP2A_GROW * OPCS(DP2A);
1509 numtmp = MAX2(numtmp, DOTP_TMP);
1510 }
1511 if (ctx.saturate) {
1512 int n = info->opcode_count[TGSI_OPCODE_TEX] +
1513 info->opcode_count[TGSI_OPCODE_TXP] +
1514 info->opcode_count[TGSI_OPCODE_TXB] +
1515 info->opcode_count[TGSI_OPCODE_TXB2] +
1516 info->opcode_count[TGSI_OPCODE_TXL];
1517 newlen += SAMP_GROW * n;
1518 numtmp = MAX2(numtmp, SAMP_TMP);
1519 }
1520
1521 /* specifically don't include two_side_colors temps in the count: */
1522 ctx.numtmp = numtmp;
1523
1524 if (ctx.two_side_colors) {
1525 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1526 /* note: we permanently consume temp regs, re-writing references
1527 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1528 * instruction that selects which varying to use):
1529 */
1530 numtmp += ctx.two_side_colors;
1531 }
1532
1533 newlen += 2 * numtmp;
1534 newlen += 5; /* immediate */
1535
1536 newtoks = tgsi_alloc_tokens(newlen);
1537 if (!newtoks)
1538 return NULL;
1539
1540 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1541
1542 tgsi_scan_shader(newtoks, info);
1543
1544 #if 0 /* debug */
1545 _debug_printf("AFTER:");
1546 tgsi_dump(newtoks, 0);
1547 #endif
1548
1549 return newtoks;
1550 }