r300/compiler: remove useless check
[mesa.git] / src / gallium / drivers / freedreno / freedreno_lowering.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
32
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
35
36 #include "freedreno_lowering.h"
37
38 struct fd_lowering_context {
39 struct tgsi_transform_context base;
40 const struct fd_lowering_config *config;
41 struct tgsi_shader_info *info;
42 unsigned two_side_colors;
43 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
44 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
45 int face_idx;
46 unsigned numtmp;
47 struct {
48 struct tgsi_full_src_register src;
49 struct tgsi_full_dst_register dst;
50 } tmp[2];
51 #define A 0
52 #define B 1
53 struct tgsi_full_src_register imm;
54 int emitted_decls;
55 unsigned saturate;
56 };
57
58 static inline struct fd_lowering_context *
59 fd_lowering_context(struct tgsi_transform_context *tctx)
60 {
61 return (struct fd_lowering_context *)tctx;
62 }
63
64 /*
65 * Utility helpers:
66 */
67
68 static void
69 reg_dst(struct tgsi_full_dst_register *dst,
70 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
71 {
72 *dst = *orig_dst;
73 dst->Register.WriteMask &= wrmask;
74 assert(dst->Register.WriteMask);
75 }
76
77 static inline void
78 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
79 {
80 swiz[0] = src->SwizzleX;
81 swiz[1] = src->SwizzleY;
82 swiz[2] = src->SwizzleZ;
83 swiz[3] = src->SwizzleW;
84 }
85
86 static void
87 reg_src(struct tgsi_full_src_register *src,
88 const struct tgsi_full_src_register *orig_src,
89 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
90 {
91 unsigned swiz[4];
92 get_swiz(swiz, &orig_src->Register);
93 *src = *orig_src;
94 src->Register.SwizzleX = swiz[sx];
95 src->Register.SwizzleY = swiz[sy];
96 src->Register.SwizzleZ = swiz[sz];
97 src->Register.SwizzleW = swiz[sw];
98 }
99
100 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
101 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
102 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
103
104 /*
105 * if (dst.x aliases src.x) {
106 * MOV tmpA.x, src.x
107 * src = tmpA
108 * }
109 * COS dst.x, src.x
110 * SIN dst.y, src.x
111 * MOV dst.zw, imm{0.0, 1.0}
112 */
113 static bool
114 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
115 const struct tgsi_full_src_register *src, unsigned src_mask)
116 {
117 if ((dst->Register.File == src->Register.File) &&
118 (dst->Register.Index == src->Register.Index)) {
119 unsigned i, actual_mask = 0;
120 unsigned swiz[4];
121 get_swiz(swiz, &src->Register);
122 for (i = 0; i < 4; i++)
123 if (src_mask & (1 << i))
124 actual_mask |= (1 << swiz[i]);
125 if (actual_mask & dst_mask)
126 return true;
127 }
128 return false;
129 }
130
131 static void
132 create_mov(struct tgsi_transform_context *tctx,
133 const struct tgsi_full_dst_register *dst,
134 const struct tgsi_full_src_register *src,
135 unsigned mask, unsigned saturate)
136 {
137 struct tgsi_full_instruction new_inst;
138
139 new_inst = tgsi_default_full_instruction();
140 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
141 new_inst.Instruction.Saturate = saturate;
142 new_inst.Instruction.NumDstRegs = 1;
143 reg_dst(&new_inst.Dst[0], dst, mask);
144 new_inst.Instruction.NumSrcRegs = 1;
145 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
146 tctx->emit_instruction(tctx, &new_inst);
147 }
148
149 /* to help calculate # of tgsi tokens for a lowering.. we assume
150 * the worst case, ie. removed instructions don't have ADDR[] or
151 * anything which increases the # of tokens per src/dst and the
152 * inserted instructions do.
153 *
154 * OINST() - old instruction
155 * 1 : instruction itself
156 * 1 : dst
157 * 1 * nargs : srcN
158 *
159 * NINST() - new instruction
160 * 1 : instruction itself
161 * 2 : dst
162 * 2 * nargs : srcN
163 */
164
165 #define OINST(nargs) (1 + 1 + 1 * (nargs))
166 #define NINST(nargs) (1 + 2 + 2 * (nargs))
167
168 /*
169 * Lowering Translators:
170 */
171
172 /* DST - Distance Vector
173 * dst.x = 1.0
174 * dst.y = src0.y \times src1.y
175 * dst.z = src0.z
176 * dst.w = src1.w
177 *
178 * ; note: could be more clever and use just a single temp
179 * ; if I was clever enough to re-write the swizzles.
180 * ; needs: 2 tmp, imm{1.0}
181 * if (dst.y aliases src0.z) {
182 * MOV tmpA.yz, src0.yz
183 * src0 = tmpA
184 * }
185 * if (dst.yz aliases src1.w) {
186 * MOV tmpB.yw, src1.yw
187 * src1 = tmpB
188 * }
189 * MUL dst.y, src0.y, src1.y
190 * MOV dst.z, src0.z
191 * MOV dst.w, src1.w
192 * MOV dst.x, imm{1.0}
193 */
194 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
195 NINST(1) + NINST(1) - OINST(2))
196 #define DST_TMP 2
197 static void
198 transform_dst(struct tgsi_transform_context *tctx,
199 struct tgsi_full_instruction *inst)
200 {
201 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
202 struct tgsi_full_dst_register *dst = &inst->Dst[0];
203 struct tgsi_full_src_register *src0 = &inst->Src[0];
204 struct tgsi_full_src_register *src1 = &inst->Src[1];
205 struct tgsi_full_instruction new_inst;
206
207 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
208 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
209 src0 = &ctx->tmp[A].src;
210 }
211
212 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
213 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
214 src1 = &ctx->tmp[B].src;
215 }
216
217 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
218 /* MUL dst.y, src0.y, src1.y */
219 new_inst = tgsi_default_full_instruction();
220 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
221 new_inst.Instruction.NumDstRegs = 1;
222 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
223 new_inst.Instruction.NumSrcRegs = 2;
224 reg_src(&new_inst.Src[0], src0, SWIZ(_,Y,_,_));
225 reg_src(&new_inst.Src[1], src1, SWIZ(_,Y,_,_));
226 tctx->emit_instruction(tctx, &new_inst);
227 }
228
229 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
230 /* MOV dst.z, src0.z */
231 new_inst = tgsi_default_full_instruction();
232 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
233 new_inst.Instruction.NumDstRegs = 1;
234 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
235 new_inst.Instruction.NumSrcRegs = 1;
236 reg_src(&new_inst.Src[0], src0, SWIZ(_,_,Z,_));
237 tctx->emit_instruction(tctx, &new_inst);
238 }
239
240 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
241 /* MOV dst.w, src1.w */
242 new_inst = tgsi_default_full_instruction();
243 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
244 new_inst.Instruction.NumDstRegs = 1;
245 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
246 new_inst.Instruction.NumSrcRegs = 1;
247 reg_src(&new_inst.Src[0], src1, SWIZ(_,_,_,W));
248 tctx->emit_instruction(tctx, &new_inst);
249 }
250
251 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
252 /* MOV dst.x, imm{1.0} */
253 new_inst = tgsi_default_full_instruction();
254 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
255 new_inst.Instruction.NumDstRegs = 1;
256 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
257 new_inst.Instruction.NumSrcRegs = 1;
258 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,_));
259 tctx->emit_instruction(tctx, &new_inst);
260 }
261 }
262
263 /* XPD - Cross Product
264 * dst.x = src0.y \times src1.z - src1.y \times src0.z
265 * dst.y = src0.z \times src1.x - src1.z \times src0.x
266 * dst.z = src0.x \times src1.y - src1.x \times src0.y
267 * dst.w = 1.0
268 *
269 * ; needs: 2 tmp, imm{1.0}
270 * MUL tmpA.xyz, src0.yzx, src1.zxy
271 * MUL tmpB.xyz, src1.yzx, src0.zxy
272 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
273 * MOV dst.w, imm{1.0}
274 */
275 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
276 #define XPD_TMP 2
277 static void
278 transform_xpd(struct tgsi_transform_context *tctx,
279 struct tgsi_full_instruction *inst)
280 {
281 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
282 struct tgsi_full_dst_register *dst = &inst->Dst[0];
283 struct tgsi_full_src_register *src0 = &inst->Src[0];
284 struct tgsi_full_src_register *src1 = &inst->Src[1];
285 struct tgsi_full_instruction new_inst;
286
287 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
288 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
289 new_inst = tgsi_default_full_instruction();
290 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
291 new_inst.Instruction.NumDstRegs = 1;
292 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
293 new_inst.Instruction.NumSrcRegs = 2;
294 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Z,X,_));
295 reg_src(&new_inst.Src[1], src1, SWIZ(Z,X,Y,_));
296 tctx->emit_instruction(tctx, &new_inst);
297
298 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
299 new_inst = tgsi_default_full_instruction();
300 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
301 new_inst.Instruction.NumDstRegs = 1;
302 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
303 new_inst.Instruction.NumSrcRegs = 2;
304 reg_src(&new_inst.Src[0], src1, SWIZ(Y,Z,X,_));
305 reg_src(&new_inst.Src[1], src0, SWIZ(Z,X,Y,_));
306 tctx->emit_instruction(tctx, &new_inst);
307
308 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
309 new_inst = tgsi_default_full_instruction();
310 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
311 new_inst.Instruction.NumDstRegs = 1;
312 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
313 new_inst.Instruction.NumSrcRegs = 2;
314 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,_));
315 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,_));
316 tctx->emit_instruction(tctx, &new_inst);
317 }
318
319 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
320 /* MOV dst.w, imm{1.0} */
321 new_inst = tgsi_default_full_instruction();
322 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
323 new_inst.Instruction.NumDstRegs = 1;
324 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
325 new_inst.Instruction.NumSrcRegs = 1;
326 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
327 tctx->emit_instruction(tctx, &new_inst);
328 }
329 }
330
331 /* SCS - Sine Cosine
332 * dst.x = \cos{src.x}
333 * dst.y = \sin{src.x}
334 * dst.z = 0.0
335 * dst.w = 1.0
336 *
337 * ; needs: 1 tmp, imm{0.0, 1.0}
338 * if (dst.x aliases src.x) {
339 * MOV tmpA.x, src.x
340 * src = tmpA
341 * }
342 * COS dst.x, src.x
343 * SIN dst.y, src.x
344 * MOV dst.zw, imm{0.0, 1.0}
345 */
346 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
347 #define SCS_TMP 1
348 static void
349 transform_scs(struct tgsi_transform_context *tctx,
350 struct tgsi_full_instruction *inst)
351 {
352 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
353 struct tgsi_full_dst_register *dst = &inst->Dst[0];
354 struct tgsi_full_src_register *src = &inst->Src[0];
355 struct tgsi_full_instruction new_inst;
356
357 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
358 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
359 src = &ctx->tmp[A].src;
360 }
361
362 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
363 /* COS dst.x, src.x */
364 new_inst = tgsi_default_full_instruction();
365 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
366 new_inst.Instruction.NumDstRegs = 1;
367 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
368 new_inst.Instruction.NumSrcRegs = 1;
369 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
370 tctx->emit_instruction(tctx, &new_inst);
371 }
372
373 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
374 /* SIN dst.y, src.x */
375 new_inst = tgsi_default_full_instruction();
376 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
377 new_inst.Instruction.NumDstRegs = 1;
378 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
379 new_inst.Instruction.NumSrcRegs = 1;
380 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
381 tctx->emit_instruction(tctx, &new_inst);
382 }
383
384 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
385 /* MOV dst.zw, imm{0.0, 1.0} */
386 new_inst = tgsi_default_full_instruction();
387 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
388 new_inst.Instruction.NumDstRegs = 1;
389 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
390 new_inst.Instruction.NumSrcRegs = 1;
391 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,X,Y));
392 tctx->emit_instruction(tctx, &new_inst);
393 }
394 }
395
396 /* LRP - Linear Interpolate
397 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
398 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
399 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
400 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
401 *
402 * ; needs: 2 tmp, imm{1.0}
403 * MUL tmpA, src0, src1
404 * SUB tmpB, imm{1.0}, src0
405 * MUL tmpB, tmpB, src2
406 * ADD dst, tmpA, tmpB
407 */
408 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
409 #define LRP_TMP 2
410 static void
411 transform_lrp(struct tgsi_transform_context *tctx,
412 struct tgsi_full_instruction *inst)
413 {
414 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
415 struct tgsi_full_dst_register *dst = &inst->Dst[0];
416 struct tgsi_full_src_register *src0 = &inst->Src[0];
417 struct tgsi_full_src_register *src1 = &inst->Src[1];
418 struct tgsi_full_src_register *src2 = &inst->Src[2];
419 struct tgsi_full_instruction new_inst;
420
421 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
422 /* MUL tmpA, src0, src1 */
423 new_inst = tgsi_default_full_instruction();
424 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
425 new_inst.Instruction.NumDstRegs = 1;
426 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
427 new_inst.Instruction.NumSrcRegs = 2;
428 reg_src(&new_inst.Src[0], src0, SWIZ(X,Y,Z,W));
429 reg_src(&new_inst.Src[1], src1, SWIZ(X,Y,Z,W));
430 tctx->emit_instruction(tctx, &new_inst);
431
432 /* SUB tmpB, imm{1.0}, src0 */
433 new_inst = tgsi_default_full_instruction();
434 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
435 new_inst.Instruction.NumDstRegs = 1;
436 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
437 new_inst.Instruction.NumSrcRegs = 2;
438 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,Y,Y,Y));
439 reg_src(&new_inst.Src[1], src0, SWIZ(X,Y,Z,W));
440 tctx->emit_instruction(tctx, &new_inst);
441
442 /* MUL tmpB, tmpB, src2 */
443 new_inst = tgsi_default_full_instruction();
444 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
445 new_inst.Instruction.NumDstRegs = 1;
446 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
447 new_inst.Instruction.NumSrcRegs = 2;
448 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
449 reg_src(&new_inst.Src[1], src2, SWIZ(X,Y,Z,W));
450 tctx->emit_instruction(tctx, &new_inst);
451
452 /* ADD dst, tmpA, tmpB */
453 new_inst = tgsi_default_full_instruction();
454 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
455 new_inst.Instruction.NumDstRegs = 1;
456 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
457 new_inst.Instruction.NumSrcRegs = 2;
458 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
459 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
460 tctx->emit_instruction(tctx, &new_inst);
461 }
462 }
463
464 /* FRC - Fraction
465 * dst.x = src.x - \lfloor src.x\rfloor
466 * dst.y = src.y - \lfloor src.y\rfloor
467 * dst.z = src.z - \lfloor src.z\rfloor
468 * dst.w = src.w - \lfloor src.w\rfloor
469 *
470 * ; needs: 1 tmp
471 * FLR tmpA, src
472 * SUB dst, src, tmpA
473 */
474 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
475 #define FRC_TMP 1
476 static void
477 transform_frc(struct tgsi_transform_context *tctx,
478 struct tgsi_full_instruction *inst)
479 {
480 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
481 struct tgsi_full_dst_register *dst = &inst->Dst[0];
482 struct tgsi_full_src_register *src = &inst->Src[0];
483 struct tgsi_full_instruction new_inst;
484
485 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
486 /* FLR tmpA, src */
487 new_inst = tgsi_default_full_instruction();
488 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
489 new_inst.Instruction.NumDstRegs = 1;
490 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
491 new_inst.Instruction.NumSrcRegs = 1;
492 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
493 tctx->emit_instruction(tctx, &new_inst);
494
495 /* SUB dst, src, tmpA */
496 new_inst = tgsi_default_full_instruction();
497 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
498 new_inst.Instruction.NumDstRegs = 1;
499 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
500 new_inst.Instruction.NumSrcRegs = 2;
501 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
502 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
503 tctx->emit_instruction(tctx, &new_inst);
504 }
505 }
506
507 /* POW - Power
508 * dst.x = src0.x^{src1.x}
509 * dst.y = src0.x^{src1.x}
510 * dst.z = src0.x^{src1.x}
511 * dst.w = src0.x^{src1.x}
512 *
513 * ; needs: 1 tmp
514 * LG2 tmpA.x, src0.x
515 * MUL tmpA.x, src1.x, tmpA.x
516 * EX2 dst, tmpA.x
517 */
518 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
519 #define POW_TMP 1
520 static void
521 transform_pow(struct tgsi_transform_context *tctx,
522 struct tgsi_full_instruction *inst)
523 {
524 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
525 struct tgsi_full_dst_register *dst = &inst->Dst[0];
526 struct tgsi_full_src_register *src0 = &inst->Src[0];
527 struct tgsi_full_src_register *src1 = &inst->Src[1];
528 struct tgsi_full_instruction new_inst;
529
530 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
531 /* LG2 tmpA.x, src0.x */
532 new_inst = tgsi_default_full_instruction();
533 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
534 new_inst.Instruction.NumDstRegs = 1;
535 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
536 new_inst.Instruction.NumSrcRegs = 1;
537 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
538 tctx->emit_instruction(tctx, &new_inst);
539
540 /* MUL tmpA.x, src1.x, tmpA.x */
541 new_inst = tgsi_default_full_instruction();
542 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
543 new_inst.Instruction.NumDstRegs = 1;
544 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
545 new_inst.Instruction.NumSrcRegs = 2;
546 reg_src(&new_inst.Src[0], src1, SWIZ(X,_,_,_));
547 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,_,_,_));
548 tctx->emit_instruction(tctx, &new_inst);
549
550 /* EX2 dst, tmpA.x */
551 new_inst = tgsi_default_full_instruction();
552 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
553 new_inst.Instruction.NumDstRegs = 1;
554 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
555 new_inst.Instruction.NumSrcRegs = 1;
556 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
557 tctx->emit_instruction(tctx, &new_inst);
558 }
559 }
560
561 /* LIT - Light Coefficients
562 * dst.x = 1.0
563 * dst.y = max(src.x, 0.0)
564 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
565 * dst.w = 1.0
566 *
567 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
568 * MAX tmpA.xy, src.xy, imm{0.0}
569 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
570 * LG2 tmpA.y, tmpA.y
571 * MUL tmpA.y, tmpA.z, tmpA.y
572 * EX2 tmpA.y, tmpA.y
573 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
574 * MOV dst.yz, tmpA.xy
575 * MOV dst.xw, imm{1.0}
576 */
577 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
578 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
579 #define LIT_TMP 1
580 static void
581 transform_lit(struct tgsi_transform_context *tctx,
582 struct tgsi_full_instruction *inst)
583 {
584 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
585 struct tgsi_full_dst_register *dst = &inst->Dst[0];
586 struct tgsi_full_src_register *src = &inst->Src[0];
587 struct tgsi_full_instruction new_inst;
588
589 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
590 /* MAX tmpA.xy, src.xy, imm{0.0} */
591 new_inst = tgsi_default_full_instruction();
592 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
593 new_inst.Instruction.NumDstRegs = 1;
594 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
595 new_inst.Instruction.NumSrcRegs = 2;
596 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,_,_));
597 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X,X,_,_));
598 tctx->emit_instruction(tctx, &new_inst);
599
600 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
601 new_inst = tgsi_default_full_instruction();
602 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
603 new_inst.Instruction.NumDstRegs = 1;
604 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
605 new_inst.Instruction.NumSrcRegs = 3;
606 reg_src(&new_inst.Src[0], src, SWIZ(_,_,W,_));
607 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_,_,Z,_));
608 new_inst.Src[1].Register.Negate = true;
609 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,_,Z,_));
610 tctx->emit_instruction(tctx, &new_inst);
611
612 /* LG2 tmpA.y, tmpA.y */
613 new_inst = tgsi_default_full_instruction();
614 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
615 new_inst.Instruction.NumDstRegs = 1;
616 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
617 new_inst.Instruction.NumSrcRegs = 1;
618 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
619 tctx->emit_instruction(tctx, &new_inst);
620
621 /* MUL tmpA.y, tmpA.z, tmpA.y */
622 new_inst = tgsi_default_full_instruction();
623 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
624 new_inst.Instruction.NumDstRegs = 1;
625 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
626 new_inst.Instruction.NumSrcRegs = 2;
627 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
628 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
629 tctx->emit_instruction(tctx, &new_inst);
630
631 /* EX2 tmpA.y, tmpA.y */
632 new_inst = tgsi_default_full_instruction();
633 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
634 new_inst.Instruction.NumDstRegs = 1;
635 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
636 new_inst.Instruction.NumSrcRegs = 1;
637 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
638 tctx->emit_instruction(tctx, &new_inst);
639
640 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
641 new_inst = tgsi_default_full_instruction();
642 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
643 new_inst.Instruction.NumDstRegs = 1;
644 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
645 new_inst.Instruction.NumSrcRegs = 3;
646 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
647 new_inst.Src[0].Register.Negate = true;
648 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
649 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,X,_,_));
650 tctx->emit_instruction(tctx, &new_inst);
651
652 /* MOV dst.yz, tmpA.xy */
653 new_inst = tgsi_default_full_instruction();
654 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
655 new_inst.Instruction.NumDstRegs = 1;
656 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
657 new_inst.Instruction.NumSrcRegs = 1;
658 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,Y,_));
659 tctx->emit_instruction(tctx, &new_inst);
660 }
661
662 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
663 /* MOV dst.xw, imm{1.0} */
664 new_inst = tgsi_default_full_instruction();
665 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
666 new_inst.Instruction.NumDstRegs = 1;
667 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
668 new_inst.Instruction.NumSrcRegs = 1;
669 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,Y));
670 tctx->emit_instruction(tctx, &new_inst);
671 }
672 }
673
674 /* EXP - Approximate Exponential Base 2
675 * dst.x = 2^{\lfloor src.x\rfloor}
676 * dst.y = src.x - \lfloor src.x\rfloor
677 * dst.z = 2^{src.x}
678 * dst.w = 1.0
679 *
680 * ; needs: 1 tmp, imm{1.0}
681 * FLR tmpA.x, src.x
682 * EX2 tmpA.y, src.x
683 * SUB dst.y, src.x, tmpA.x
684 * EX2 dst.x, tmpA.x
685 * MOV dst.z, tmpA.y
686 * MOV dst.w, imm{1.0}
687 */
688 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
689 NINST(1)+ NINST(1) - OINST(1))
690 #define EXP_TMP 1
691 static void
692 transform_exp(struct tgsi_transform_context *tctx,
693 struct tgsi_full_instruction *inst)
694 {
695 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
696 struct tgsi_full_dst_register *dst = &inst->Dst[0];
697 struct tgsi_full_src_register *src = &inst->Src[0];
698 struct tgsi_full_instruction new_inst;
699
700 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
701 /* FLR tmpA.x, src.x */
702 new_inst = tgsi_default_full_instruction();
703 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
704 new_inst.Instruction.NumDstRegs = 1;
705 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
706 new_inst.Instruction.NumSrcRegs = 1;
707 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
708 tctx->emit_instruction(tctx, &new_inst);
709 }
710
711 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
712 /* EX2 tmpA.y, src.x */
713 new_inst = tgsi_default_full_instruction();
714 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
715 new_inst.Instruction.NumDstRegs = 1;
716 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
717 new_inst.Instruction.NumSrcRegs = 1;
718 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
719 tctx->emit_instruction(tctx, &new_inst);
720 }
721
722 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
723 /* SUB dst.y, src.x, tmpA.x */
724 new_inst = tgsi_default_full_instruction();
725 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
726 new_inst.Instruction.NumDstRegs = 1;
727 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
728 new_inst.Instruction.NumSrcRegs = 2;
729 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
730 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,X,_,_));
731 tctx->emit_instruction(tctx, &new_inst);
732 }
733
734 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
735 /* EX2 dst.x, tmpA.x */
736 new_inst = tgsi_default_full_instruction();
737 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
738 new_inst.Instruction.NumDstRegs = 1;
739 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
740 new_inst.Instruction.NumSrcRegs = 1;
741 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
742 tctx->emit_instruction(tctx, &new_inst);
743 }
744
745 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
746 /* MOV dst.z, tmpA.y */
747 new_inst = tgsi_default_full_instruction();
748 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
749 new_inst.Instruction.NumDstRegs = 1;
750 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
751 new_inst.Instruction.NumSrcRegs = 1;
752 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,_,Y,_));
753 tctx->emit_instruction(tctx, &new_inst);
754 }
755
756 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
757 /* MOV dst.w, imm{1.0} */
758 new_inst = tgsi_default_full_instruction();
759 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
760 new_inst.Instruction.NumDstRegs = 1;
761 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
762 new_inst.Instruction.NumSrcRegs = 1;
763 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
764 tctx->emit_instruction(tctx, &new_inst);
765 }
766 }
767
768 /* LOG - Approximate Logarithm Base 2
769 * dst.x = \lfloor\log_2{|src.x|}\rfloor
770 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
771 * dst.z = \log_2{|src.x|}
772 * dst.w = 1.0
773 *
774 * ; needs: 1 tmp, imm{1.0}
775 * LG2 tmpA.x, |src.x|
776 * FLR tmpA.y, tmpA.x
777 * EX2 tmpA.z, tmpA.y
778 * RCP tmpA.z, tmpA.z
779 * MUL dst.y, |src.x|, tmpA.z
780 * MOV dst.xz, tmpA.yx
781 * MOV dst.w, imm{1.0}
782 */
783 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
784 NINST(2) + NINST(1) + NINST(1) - OINST(1))
785 #define LOG_TMP 1
786 static void
787 transform_log(struct tgsi_transform_context *tctx,
788 struct tgsi_full_instruction *inst)
789 {
790 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
791 struct tgsi_full_dst_register *dst = &inst->Dst[0];
792 struct tgsi_full_src_register *src = &inst->Src[0];
793 struct tgsi_full_instruction new_inst;
794
795 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
796 /* LG2 tmpA.x, |src.x| */
797 new_inst = tgsi_default_full_instruction();
798 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
799 new_inst.Instruction.NumDstRegs = 1;
800 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
801 new_inst.Instruction.NumSrcRegs = 1;
802 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
803 new_inst.Src[0].Register.Absolute = true;
804 tctx->emit_instruction(tctx, &new_inst);
805 }
806
807 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
808 /* FLR tmpA.y, tmpA.x */
809 new_inst = tgsi_default_full_instruction();
810 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
811 new_inst.Instruction.NumDstRegs = 1;
812 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
813 new_inst.Instruction.NumSrcRegs = 1;
814 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,_,_));
815 tctx->emit_instruction(tctx, &new_inst);
816 }
817
818 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
819 /* EX2 tmpA.z, tmpA.y */
820 new_inst = tgsi_default_full_instruction();
821 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
822 new_inst.Instruction.NumDstRegs = 1;
823 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
824 new_inst.Instruction.NumSrcRegs = 1;
825 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
826 tctx->emit_instruction(tctx, &new_inst);
827
828 /* RCP tmpA.z, tmpA.z */
829 new_inst = tgsi_default_full_instruction();
830 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
831 new_inst.Instruction.NumDstRegs = 1;
832 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
833 new_inst.Instruction.NumSrcRegs = 1;
834 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z,_,_,_));
835 tctx->emit_instruction(tctx, &new_inst);
836
837 /* MUL dst.y, |src.x|, tmpA.z */
838 new_inst = tgsi_default_full_instruction();
839 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
840 new_inst.Instruction.NumDstRegs = 1;
841 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
842 new_inst.Instruction.NumSrcRegs = 2;
843 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
844 new_inst.Src[0].Register.Absolute = true;
845 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
846 tctx->emit_instruction(tctx, &new_inst);
847 }
848
849 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
850 /* MOV dst.xz, tmpA.yx */
851 new_inst = tgsi_default_full_instruction();
852 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
853 new_inst.Instruction.NumDstRegs = 1;
854 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
855 new_inst.Instruction.NumSrcRegs = 1;
856 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,X,_));
857 tctx->emit_instruction(tctx, &new_inst);
858 }
859
860 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
861 /* MOV dst.w, imm{1.0} */
862 new_inst = tgsi_default_full_instruction();
863 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
864 new_inst.Instruction.NumDstRegs = 1;
865 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
866 new_inst.Instruction.NumSrcRegs = 1;
867 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
868 tctx->emit_instruction(tctx, &new_inst);
869 }
870 }
871
872 /* DP4 - 4-component Dot Product
873 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
874 *
875 * DP3 - 3-component Dot Product
876 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
877 *
878 * DPH - Homogeneous Dot Product
879 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
880 *
881 * DP2 - 2-component Dot Product
882 * dst = src0.x \times src1.x + src0.y \times src1.y
883 *
884 * DP2A - 2-component Dot Product And Add
885 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
886 *
887 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
888 * operations, which is what you'd prefer for a ISA that is natively
889 * scalar. Probably a native vector ISA would at least already have
890 * DP4/DP3 instructions, but perhaps there is room for an alternative
891 * translation for DPH/DP2/DP2A using vector instructions.
892 *
893 * ; needs: 1 tmp
894 * MUL tmpA.x, src0.x, src1.x
895 * MAD tmpA.x, src0.y, src1.y, tmpA.x
896 * if (DPH || DP3 || DP4) {
897 * MAD tmpA.x, src0.z, src1.z, tmpA.x
898 * if (DPH) {
899 * ADD tmpA.x, src1.w, tmpA.x
900 * } else if (DP4) {
901 * MAD tmpA.x, src0.w, src1.w, tmpA.x
902 * }
903 * } else if (DP2A) {
904 * ADD tmpA.x, src2.x, tmpA.x
905 * }
906 * ; fixup last instruction to replicate into dst
907 */
908 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
909 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
910 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
911 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
912 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
913 #define DOTP_TMP 1
914 static void
915 transform_dotp(struct tgsi_transform_context *tctx,
916 struct tgsi_full_instruction *inst)
917 {
918 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
919 struct tgsi_full_dst_register *dst = &inst->Dst[0];
920 struct tgsi_full_src_register *src0 = &inst->Src[0];
921 struct tgsi_full_src_register *src1 = &inst->Src[1];
922 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
923 struct tgsi_full_instruction new_inst;
924 unsigned opcode = inst->Instruction.Opcode;
925
926 /* NOTE: any potential last instruction must replicate src on all
927 * components (since it could be re-written to write to final dst)
928 */
929
930 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
931 /* MUL tmpA.x, src0.x, src1.x */
932 new_inst = tgsi_default_full_instruction();
933 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
934 new_inst.Instruction.NumDstRegs = 1;
935 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
936 new_inst.Instruction.NumSrcRegs = 2;
937 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
938 reg_src(&new_inst.Src[1], src1, SWIZ(X,_,_,_));
939 tctx->emit_instruction(tctx, &new_inst);
940
941 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
942 new_inst = tgsi_default_full_instruction();
943 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
944 new_inst.Instruction.NumDstRegs = 1;
945 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
946 new_inst.Instruction.NumSrcRegs = 3;
947 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Y,Y,Y));
948 reg_src(&new_inst.Src[1], src1, SWIZ(Y,Y,Y,Y));
949 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
950
951 if ((opcode == TGSI_OPCODE_DPH) ||
952 (opcode == TGSI_OPCODE_DP3) ||
953 (opcode == TGSI_OPCODE_DP4)) {
954 tctx->emit_instruction(tctx, &new_inst);
955
956 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
957 new_inst = tgsi_default_full_instruction();
958 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
959 new_inst.Instruction.NumDstRegs = 1;
960 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
961 new_inst.Instruction.NumSrcRegs = 3;
962 reg_src(&new_inst.Src[0], src0, SWIZ(Z,Z,Z,Z));
963 reg_src(&new_inst.Src[1], src1, SWIZ(Z,Z,Z,Z));
964 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
965
966 if (opcode == TGSI_OPCODE_DPH) {
967 tctx->emit_instruction(tctx, &new_inst);
968
969 /* ADD tmpA.x, src1.w, tmpA.x */
970 new_inst = tgsi_default_full_instruction();
971 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
972 new_inst.Instruction.NumDstRegs = 1;
973 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
974 new_inst.Instruction.NumSrcRegs = 2;
975 reg_src(&new_inst.Src[0], src1, SWIZ(W,W,W,W));
976 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
977 } else if (opcode == TGSI_OPCODE_DP4) {
978 tctx->emit_instruction(tctx, &new_inst);
979
980 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
981 new_inst = tgsi_default_full_instruction();
982 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
983 new_inst.Instruction.NumDstRegs = 1;
984 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
985 new_inst.Instruction.NumSrcRegs = 3;
986 reg_src(&new_inst.Src[0], src0, SWIZ(W,W,W,W));
987 reg_src(&new_inst.Src[1], src1, SWIZ(W,W,W,W));
988 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
989 }
990 } else if (opcode == TGSI_OPCODE_DP2A) {
991 tctx->emit_instruction(tctx, &new_inst);
992
993 /* ADD tmpA.x, src2.x, tmpA.x */
994 new_inst = tgsi_default_full_instruction();
995 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
996 new_inst.Instruction.NumDstRegs = 1;
997 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
998 new_inst.Instruction.NumSrcRegs = 2;
999 reg_src(&new_inst.Src[0], src2, SWIZ(X,X,X,X));
1000 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
1001 }
1002
1003 /* fixup last instruction to write to dst: */
1004 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005
1006 tctx->emit_instruction(tctx, &new_inst);
1007 }
1008 }
1009
1010 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1011 * in the case of TXP, the clamping must happen *after* projection, so
1012 * we need to lower TXP to TEX.
1013 *
1014 * MOV tmpA, src0
1015 * if (opc == TXP) {
1016 * ; do perspective division manually before clamping:
1017 * RCP tmpB, tmpA.w
1018 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1019 * opc = TEX;
1020 * }
1021 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1022 * <opc> dst, tmpA, ...
1023 */
1024 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1025 #define SAMP_TMP 2
1026 static int
1027 transform_samp(struct tgsi_transform_context *tctx,
1028 struct tgsi_full_instruction *inst)
1029 {
1030 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1031 struct tgsi_full_src_register *coord = &inst->Src[0];
1032 struct tgsi_full_src_register *samp;
1033 struct tgsi_full_instruction new_inst;
1034 /* mask is clamped coords, pmask is all coords (for projection): */
1035 unsigned mask = 0, pmask = 0, smask;
1036 unsigned opcode = inst->Instruction.Opcode;
1037
1038 if (opcode == TGSI_OPCODE_TXB2) {
1039 samp = &inst->Src[2];
1040 } else {
1041 samp = &inst->Src[1];
1042 }
1043
1044 /* convert sampler # to bitmask to test: */
1045 smask = 1 << samp->Register.Index;
1046
1047 /* check if we actually need to lower this one: */
1048 if (!(ctx->saturate & smask))
1049 return -1;
1050
1051 /* figure out which coordinates need saturating:
1052 * - RECT textures should not get saturated
1053 * - array index coords should not get saturated
1054 */
1055 switch (inst->Texture.Texture) {
1056 case TGSI_TEXTURE_3D:
1057 case TGSI_TEXTURE_CUBE:
1058 case TGSI_TEXTURE_CUBE_ARRAY:
1059 case TGSI_TEXTURE_SHADOWCUBE:
1060 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1061 if (ctx->config->saturate_r & smask)
1062 mask |= TGSI_WRITEMASK_Z;
1063 pmask |= TGSI_WRITEMASK_Z;
1064 /* fallthrough */
1065
1066 case TGSI_TEXTURE_2D:
1067 case TGSI_TEXTURE_2D_ARRAY:
1068 case TGSI_TEXTURE_SHADOW2D:
1069 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1070 case TGSI_TEXTURE_2D_MSAA:
1071 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1072 if (ctx->config->saturate_t & smask)
1073 mask |= TGSI_WRITEMASK_Y;
1074 pmask |= TGSI_WRITEMASK_Y;
1075 /* fallthrough */
1076
1077 case TGSI_TEXTURE_1D:
1078 case TGSI_TEXTURE_1D_ARRAY:
1079 case TGSI_TEXTURE_SHADOW1D:
1080 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1081 if (ctx->config->saturate_s & smask)
1082 mask |= TGSI_WRITEMASK_X;
1083 pmask |= TGSI_WRITEMASK_X;
1084 break;
1085
1086 /* TODO: I think we should ignore these?
1087 case TGSI_TEXTURE_RECT:
1088 case TGSI_TEXTURE_SHADOWRECT:
1089 */
1090 }
1091
1092 /* sanity check.. driver could be asking to saturate a non-
1093 * existent coordinate component:
1094 */
1095 if (!mask)
1096 return -1;
1097
1098 /* MOV tmpA, src0 */
1099 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1100
1101 /* This is a bit sad.. we need to clamp *after* the coords
1102 * are projected, which means lowering TXP to TEX and doing
1103 * the projection ourself. But since I haven't figured out
1104 * how to make the lowering code deliver an electric shock
1105 * to anyone using GL_CLAMP, we must do this instead:
1106 */
1107 if (opcode == TGSI_OPCODE_TXP) {
1108 /* RCP tmpB.x tmpA.w */
1109 new_inst = tgsi_default_full_instruction();
1110 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1111 new_inst.Instruction.NumDstRegs = 1;
1112 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1113 new_inst.Instruction.NumSrcRegs = 1;
1114 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W,_,_,_));
1115 tctx->emit_instruction(tctx, &new_inst);
1116
1117 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1118 new_inst = tgsi_default_full_instruction();
1119 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1120 new_inst.Instruction.NumDstRegs = 1;
1121 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1122 new_inst.Instruction.NumSrcRegs = 2;
1123 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
1124 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,X,X,X));
1125 tctx->emit_instruction(tctx, &new_inst);
1126
1127 opcode = TGSI_OPCODE_TEX;
1128 }
1129
1130 /* MOV_SAT tmpA.<mask>, tmpA */
1131 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
1132 TGSI_SAT_ZERO_ONE);
1133
1134 /* modify the texture samp instruction to take fixed up coord: */
1135 new_inst = *inst;
1136 new_inst.Instruction.Opcode = opcode;
1137 new_inst.Src[0] = ctx->tmp[A].src;
1138 tctx->emit_instruction(tctx, &new_inst);
1139
1140 return 0;
1141 }
1142
1143 /* Two-sided color emulation:
1144 * For each COLOR input, create a corresponding BCOLOR input, plus
1145 * CMP instruction to select front or back color based on FACE
1146 */
1147 #define TWOSIDE_GROW(n) ( \
1148 2 + /* FACE */ \
1149 ((n) * 2) + /* IN[] BCOLOR[n] */ \
1150 ((n) * 1) + /* TEMP[] */ \
1151 ((n) * NINST(3)) /* CMP instr */ \
1152 )
1153
1154 static void
1155 emit_twoside(struct tgsi_transform_context *tctx)
1156 {
1157 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1158 struct tgsi_shader_info *info = ctx->info;
1159 struct tgsi_full_declaration decl;
1160 struct tgsi_full_instruction new_inst;
1161 unsigned inbase, tmpbase;
1162 int i;
1163
1164 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1165 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1166
1167 /* additional inputs for BCOLOR's */
1168 for (i = 0; i < ctx->two_side_colors; i++) {
1169 decl = tgsi_default_full_declaration();
1170 decl.Declaration.File = TGSI_FILE_INPUT;
1171 decl.Declaration.Semantic = true;
1172 decl.Range.First = decl.Range.Last = inbase + i;
1173 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1174 decl.Semantic.Index =
1175 info->input_semantic_index[ctx->two_side_idx[i]];
1176 tctx->emit_declaration(tctx, &decl);
1177 }
1178
1179 /* additional input for FACE */
1180 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1181 decl = tgsi_default_full_declaration();
1182 decl.Declaration.File = TGSI_FILE_INPUT;
1183 decl.Declaration.Semantic = true;
1184 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1185 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1186 decl.Semantic.Index = 0;
1187 tctx->emit_declaration(tctx, &decl);
1188
1189 ctx->face_idx = decl.Range.First;
1190 }
1191
1192 /* additional temps for COLOR/BCOLOR selection: */
1193 for (i = 0; i < ctx->two_side_colors; i++) {
1194 decl = tgsi_default_full_declaration();
1195 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1196 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1197 tctx->emit_declaration(tctx, &decl);
1198 }
1199
1200 /* and finally additional instructions to select COLOR/BCOLOR: */
1201 for (i = 0; i < ctx->two_side_colors; i++) {
1202 new_inst = tgsi_default_full_instruction();
1203 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1204
1205 new_inst.Instruction.NumDstRegs = 1;
1206 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1207 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1208 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1209
1210 new_inst.Instruction.NumSrcRegs = 3;
1211 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1212 new_inst.Src[0].Register.Index = ctx->face_idx;
1213 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1214 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1215 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1216 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1217 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1218 new_inst.Src[1].Register.Index = inbase + i;
1219 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1220 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1221 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1222 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1223 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1224 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1225 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1226 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1227 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1228 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1229
1230 tctx->emit_instruction(tctx, &new_inst);
1231 }
1232 }
1233
1234 static void
1235 emit_decls(struct tgsi_transform_context *tctx)
1236 {
1237 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1238 struct tgsi_shader_info *info = ctx->info;
1239 struct tgsi_full_declaration decl;
1240 struct tgsi_full_immediate immed;
1241 unsigned tmpbase;
1242 int i;
1243
1244 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1245
1246 ctx->color_base = tmpbase + ctx->numtmp;
1247
1248 /* declare immediate: */
1249 immed = tgsi_default_full_immediate();
1250 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1251 immed.u[0].Float = 0.0;
1252 immed.u[1].Float = 1.0;
1253 immed.u[2].Float = 128.0;
1254 immed.u[3].Float = 0.0;
1255 tctx->emit_immediate(tctx, &immed);
1256
1257 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1258 ctx->imm.Register.Index = info->immediate_count;
1259 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1260 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1261 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1262 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1263
1264 /* declare temp regs: */
1265 for (i = 0; i < ctx->numtmp; i++) {
1266 decl = tgsi_default_full_declaration();
1267 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1268 decl.Range.First = decl.Range.Last = tmpbase + i;
1269 tctx->emit_declaration(tctx, &decl);
1270
1271 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1272 ctx->tmp[i].src.Register.Index = tmpbase + i;
1273 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1274 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1275 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1276 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1277
1278 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1279 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1280 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1281 }
1282
1283 if (ctx->two_side_colors)
1284 emit_twoside(tctx);
1285 }
1286
1287 static void
1288 rename_color_inputs(struct fd_lowering_context *ctx,
1289 struct tgsi_full_instruction *inst)
1290 {
1291 unsigned i, j;
1292 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1293 struct tgsi_src_register *src = &inst->Src[i].Register;
1294 if (src->File == TGSI_FILE_INPUT) {
1295 for (j = 0; j < ctx->two_side_colors; j++) {
1296 if (src->Index == ctx->two_side_idx[j]) {
1297 src->File = TGSI_FILE_TEMPORARY;
1298 src->Index = ctx->color_base + j;
1299 break;
1300 }
1301 }
1302 }
1303 }
1304
1305 }
1306
1307 static void
1308 transform_instr(struct tgsi_transform_context *tctx,
1309 struct tgsi_full_instruction *inst)
1310 {
1311 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1312
1313 if (!ctx->emitted_decls) {
1314 emit_decls(tctx);
1315 ctx->emitted_decls = 1;
1316 }
1317
1318 /* if emulating two-sided-color, we need to re-write some
1319 * src registers:
1320 */
1321 if (ctx->two_side_colors)
1322 rename_color_inputs(ctx, inst);
1323
1324 switch (inst->Instruction.Opcode) {
1325 case TGSI_OPCODE_DST:
1326 if (!ctx->config->lower_DST)
1327 goto skip;
1328 transform_dst(tctx, inst);
1329 break;
1330 case TGSI_OPCODE_XPD:
1331 if (!ctx->config->lower_XPD)
1332 goto skip;
1333 transform_xpd(tctx, inst);
1334 break;
1335 case TGSI_OPCODE_SCS:
1336 if (!ctx->config->lower_SCS)
1337 goto skip;
1338 transform_scs(tctx, inst);
1339 break;
1340 case TGSI_OPCODE_LRP:
1341 if (!ctx->config->lower_LRP)
1342 goto skip;
1343 transform_lrp(tctx, inst);
1344 break;
1345 case TGSI_OPCODE_FRC:
1346 if (!ctx->config->lower_FRC)
1347 goto skip;
1348 transform_frc(tctx, inst);
1349 break;
1350 case TGSI_OPCODE_POW:
1351 if (!ctx->config->lower_POW)
1352 goto skip;
1353 transform_pow(tctx, inst);
1354 break;
1355 case TGSI_OPCODE_LIT:
1356 if (!ctx->config->lower_LIT)
1357 goto skip;
1358 transform_lit(tctx, inst);
1359 break;
1360 case TGSI_OPCODE_EXP:
1361 if (!ctx->config->lower_EXP)
1362 goto skip;
1363 transform_exp(tctx, inst);
1364 break;
1365 case TGSI_OPCODE_LOG:
1366 if (!ctx->config->lower_LOG)
1367 goto skip;
1368 transform_log(tctx, inst);
1369 break;
1370 case TGSI_OPCODE_DP4:
1371 if (!ctx->config->lower_DP4)
1372 goto skip;
1373 transform_dotp(tctx, inst);
1374 break;
1375 case TGSI_OPCODE_DP3:
1376 if (!ctx->config->lower_DP3)
1377 goto skip;
1378 transform_dotp(tctx, inst);
1379 break;
1380 case TGSI_OPCODE_DPH:
1381 if (!ctx->config->lower_DPH)
1382 goto skip;
1383 transform_dotp(tctx, inst);
1384 break;
1385 case TGSI_OPCODE_DP2:
1386 if (!ctx->config->lower_DP2)
1387 goto skip;
1388 transform_dotp(tctx, inst);
1389 break;
1390 case TGSI_OPCODE_DP2A:
1391 if (!ctx->config->lower_DP2A)
1392 goto skip;
1393 transform_dotp(tctx, inst);
1394 break;
1395 case TGSI_OPCODE_TEX:
1396 case TGSI_OPCODE_TXP:
1397 case TGSI_OPCODE_TXB:
1398 case TGSI_OPCODE_TXB2:
1399 case TGSI_OPCODE_TXL:
1400 if (transform_samp(tctx, inst))
1401 goto skip;
1402 break;
1403 default:
1404 skip:
1405 tctx->emit_instruction(tctx, inst);
1406 break;
1407 }
1408 }
1409
1410 /* returns NULL if no lowering required, else returns the new
1411 * tokens (which caller is required to free()). In either case
1412 * returns the current info.
1413 */
1414 const struct tgsi_token *
1415 fd_transform_lowering(const struct fd_lowering_config *config,
1416 const struct tgsi_token *tokens,
1417 struct tgsi_shader_info *info)
1418 {
1419 struct fd_lowering_context ctx;
1420 struct tgsi_token *newtoks;
1421 int newlen, numtmp;
1422
1423 /* sanity check in case limit is ever increased: */
1424 assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1425
1426 memset(&ctx, 0, sizeof(ctx));
1427 ctx.base.transform_instruction = transform_instr;
1428 ctx.info = info;
1429 ctx.config = config;
1430
1431 tgsi_scan_shader(tokens, info);
1432
1433 /* if we are adding fragment shader support to emulate two-sided
1434 * color, then figure out the number of additional inputs we need
1435 * to create for BCOLOR's..
1436 */
1437 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1438 config->color_two_side) {
1439 int i;
1440 ctx.face_idx = -1;
1441 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1442 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1443 ctx.two_side_idx[ctx.two_side_colors++] = i;
1444 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1445 ctx.face_idx = i;
1446 }
1447 }
1448
1449 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1450
1451 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1452 /* if there are no instructions to lower, then we are done: */
1453 if (!(OPCS(DST) ||
1454 OPCS(XPD) ||
1455 OPCS(SCS) ||
1456 OPCS(LRP) ||
1457 OPCS(FRC) ||
1458 OPCS(POW) ||
1459 OPCS(LIT) ||
1460 OPCS(EXP) ||
1461 OPCS(LOG) ||
1462 OPCS(DP4) ||
1463 OPCS(DP3) ||
1464 OPCS(DPH) ||
1465 OPCS(DP2) ||
1466 OPCS(DP2A) ||
1467 ctx.two_side_colors ||
1468 ctx.saturate))
1469 return NULL;
1470
1471 #if 0 /* debug */
1472 _debug_printf("BEFORE:");
1473 tgsi_dump(tokens, 0);
1474 #endif
1475
1476 numtmp = 0;
1477 newlen = tgsi_num_tokens(tokens);
1478 if (OPCS(DST)) {
1479 newlen += DST_GROW * OPCS(DST);
1480 numtmp = MAX2(numtmp, DST_TMP);
1481 }
1482 if (OPCS(XPD)) {
1483 newlen += XPD_GROW * OPCS(XPD);
1484 numtmp = MAX2(numtmp, XPD_TMP);
1485 }
1486 if (OPCS(SCS)) {
1487 newlen += SCS_GROW * OPCS(SCS);
1488 numtmp = MAX2(numtmp, SCS_TMP);
1489 }
1490 if (OPCS(LRP)) {
1491 newlen += LRP_GROW * OPCS(LRP);
1492 numtmp = MAX2(numtmp, LRP_TMP);
1493 }
1494 if (OPCS(FRC)) {
1495 newlen += FRC_GROW * OPCS(FRC);
1496 numtmp = MAX2(numtmp, FRC_TMP);
1497 }
1498 if (OPCS(POW)) {
1499 newlen += POW_GROW * OPCS(POW);
1500 numtmp = MAX2(numtmp, POW_TMP);
1501 }
1502 if (OPCS(LIT)) {
1503 newlen += LIT_GROW * OPCS(LIT);
1504 numtmp = MAX2(numtmp, LIT_TMP);
1505 }
1506 if (OPCS(EXP)) {
1507 newlen += EXP_GROW * OPCS(EXP);
1508 numtmp = MAX2(numtmp, EXP_TMP);
1509 }
1510 if (OPCS(LOG)) {
1511 newlen += LOG_GROW * OPCS(LOG);
1512 numtmp = MAX2(numtmp, LOG_TMP);
1513 }
1514 if (OPCS(DP4)) {
1515 newlen += DP4_GROW * OPCS(DP4);
1516 numtmp = MAX2(numtmp, DOTP_TMP);
1517 }
1518 if (OPCS(DP3)) {
1519 newlen += DP3_GROW * OPCS(DP3);
1520 numtmp = MAX2(numtmp, DOTP_TMP);
1521 }
1522 if (OPCS(DPH)) {
1523 newlen += DPH_GROW * OPCS(DPH);
1524 numtmp = MAX2(numtmp, DOTP_TMP);
1525 }
1526 if (OPCS(DP2)) {
1527 newlen += DP2_GROW * OPCS(DP2);
1528 numtmp = MAX2(numtmp, DOTP_TMP);
1529 }
1530 if (OPCS(DP2A)) {
1531 newlen += DP2A_GROW * OPCS(DP2A);
1532 numtmp = MAX2(numtmp, DOTP_TMP);
1533 }
1534 if (ctx.saturate) {
1535 int n = info->opcode_count[TGSI_OPCODE_TEX] +
1536 info->opcode_count[TGSI_OPCODE_TXP] +
1537 info->opcode_count[TGSI_OPCODE_TXB] +
1538 info->opcode_count[TGSI_OPCODE_TXB2] +
1539 info->opcode_count[TGSI_OPCODE_TXL];
1540 newlen += SAMP_GROW * n;
1541 numtmp = MAX2(numtmp, SAMP_TMP);
1542 }
1543
1544 /* specifically don't include two_side_colors temps in the count: */
1545 ctx.numtmp = numtmp;
1546
1547 if (ctx.two_side_colors) {
1548 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1549 /* note: we permanently consume temp regs, re-writing references
1550 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1551 * instruction that selects which varying to use):
1552 */
1553 numtmp += ctx.two_side_colors;
1554 }
1555
1556 newlen += 2 * numtmp;
1557 newlen += 5; /* immediate */
1558
1559 newtoks = tgsi_alloc_tokens(newlen);
1560 if (!newtoks)
1561 return NULL;
1562
1563 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1564
1565 tgsi_scan_shader(newtoks, info);
1566
1567 #if 0 /* debug */
1568 _debug_printf("AFTER:");
1569 tgsi_dump(newtoks, 0);
1570 #endif
1571
1572 return newtoks;
1573 }