gallium/ureg: Set the next shader stage from the shader info.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
1 /*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/u_debug.h"
32 #include "util/u_math.h"
33
34 #include "tgsi_lowering.h"
35
36 struct tgsi_lowering_context {
37 struct tgsi_transform_context base;
38 const struct tgsi_lowering_config *config;
39 struct tgsi_shader_info *info;
40 unsigned two_side_colors;
41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
43 int face_idx;
44 unsigned numtmp;
45 struct {
46 struct tgsi_full_src_register src;
47 struct tgsi_full_dst_register dst;
48 } tmp[2];
49 #define A 0
50 #define B 1
51 struct tgsi_full_src_register imm;
52 int emitted_decls;
53 unsigned saturate;
54 };
55
56 static inline struct tgsi_lowering_context *
57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
58 {
59 return (struct tgsi_lowering_context *)tctx;
60 }
61
62 /*
63 * Utility helpers:
64 */
65
66 static void
67 reg_dst(struct tgsi_full_dst_register *dst,
68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
69 {
70 *dst = *orig_dst;
71 dst->Register.WriteMask &= wrmask;
72 assert(dst->Register.WriteMask);
73 }
74
75 static inline void
76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
77 {
78 swiz[0] = src->SwizzleX;
79 swiz[1] = src->SwizzleY;
80 swiz[2] = src->SwizzleZ;
81 swiz[3] = src->SwizzleW;
82 }
83
84 static void
85 reg_src(struct tgsi_full_src_register *src,
86 const struct tgsi_full_src_register *orig_src,
87 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
88 {
89 unsigned swiz[4];
90 get_swiz(swiz, &orig_src->Register);
91 *src = *orig_src;
92 src->Register.SwizzleX = swiz[sx];
93 src->Register.SwizzleY = swiz[sy];
94 src->Register.SwizzleZ = swiz[sz];
95 src->Register.SwizzleW = swiz[sw];
96 }
97
98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
101
102 /*
103 * if (dst.x aliases src.x) {
104 * MOV tmpA.x, src.x
105 * src = tmpA
106 * }
107 * COS dst.x, src.x
108 * SIN dst.y, src.x
109 * MOV dst.zw, imm{0.0, 1.0}
110 */
111 static bool
112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
113 const struct tgsi_full_src_register *src, unsigned src_mask)
114 {
115 if ((dst->Register.File == src->Register.File) &&
116 (dst->Register.Index == src->Register.Index)) {
117 unsigned i, actual_mask = 0;
118 unsigned swiz[4];
119 get_swiz(swiz, &src->Register);
120 for (i = 0; i < 4; i++)
121 if (src_mask & (1 << i))
122 actual_mask |= (1 << swiz[i]);
123 if (actual_mask & dst_mask)
124 return true;
125 }
126 return false;
127 }
128
129 static void
130 create_mov(struct tgsi_transform_context *tctx,
131 const struct tgsi_full_dst_register *dst,
132 const struct tgsi_full_src_register *src,
133 unsigned mask, unsigned saturate)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.Saturate = saturate;
140 new_inst.Instruction.NumDstRegs = 1;
141 reg_dst(&new_inst.Dst[0], dst, mask);
142 new_inst.Instruction.NumSrcRegs = 1;
143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
144 tctx->emit_instruction(tctx, &new_inst);
145 }
146
147 /* to help calculate # of tgsi tokens for a lowering.. we assume
148 * the worst case, ie. removed instructions don't have ADDR[] or
149 * anything which increases the # of tokens per src/dst and the
150 * inserted instructions do.
151 *
152 * OINST() - old instruction
153 * 1 : instruction itself
154 * 1 : dst
155 * 1 * nargs : srcN
156 *
157 * NINST() - new instruction
158 * 1 : instruction itself
159 * 2 : dst
160 * 2 * nargs : srcN
161 */
162
163 #define OINST(nargs) (1 + 1 + 1 * (nargs))
164 #define NINST(nargs) (1 + 2 + 2 * (nargs))
165
166 /*
167 * Lowering Translators:
168 */
169
170 /* DST - Distance Vector
171 * dst.x = 1.0
172 * dst.y = src0.y \times src1.y
173 * dst.z = src0.z
174 * dst.w = src1.w
175 *
176 * ; note: could be more clever and use just a single temp
177 * ; if I was clever enough to re-write the swizzles.
178 * ; needs: 2 tmp, imm{1.0}
179 * if (dst.y aliases src0.z) {
180 * MOV tmpA.yz, src0.yz
181 * src0 = tmpA
182 * }
183 * if (dst.yz aliases src1.w) {
184 * MOV tmpB.yw, src1.yw
185 * src1 = tmpB
186 * }
187 * MUL dst.y, src0.y, src1.y
188 * MOV dst.z, src0.z
189 * MOV dst.w, src1.w
190 * MOV dst.x, imm{1.0}
191 */
192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
193 NINST(1) + NINST(1) - OINST(2))
194 #define DST_TMP 2
195 static void
196 transform_dst(struct tgsi_transform_context *tctx,
197 struct tgsi_full_instruction *inst)
198 {
199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
200 struct tgsi_full_dst_register *dst = &inst->Dst[0];
201 struct tgsi_full_src_register *src0 = &inst->Src[0];
202 struct tgsi_full_src_register *src1 = &inst->Src[1];
203 struct tgsi_full_instruction new_inst;
204
205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
207 src0 = &ctx->tmp[A].src;
208 }
209
210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
212 src1 = &ctx->tmp[B].src;
213 }
214
215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
216 /* MUL dst.y, src0.y, src1.y */
217 new_inst = tgsi_default_full_instruction();
218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
219 new_inst.Instruction.NumDstRegs = 1;
220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
221 new_inst.Instruction.NumSrcRegs = 2;
222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
224 tctx->emit_instruction(tctx, &new_inst);
225 }
226
227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
228 /* MOV dst.z, src0.z */
229 new_inst = tgsi_default_full_instruction();
230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
231 new_inst.Instruction.NumDstRegs = 1;
232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
233 new_inst.Instruction.NumSrcRegs = 1;
234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
235 tctx->emit_instruction(tctx, &new_inst);
236 }
237
238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
239 /* MOV dst.w, src1.w */
240 new_inst = tgsi_default_full_instruction();
241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
242 new_inst.Instruction.NumDstRegs = 1;
243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
244 new_inst.Instruction.NumSrcRegs = 1;
245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
246 tctx->emit_instruction(tctx, &new_inst);
247 }
248
249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
250 /* MOV dst.x, imm{1.0} */
251 new_inst = tgsi_default_full_instruction();
252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
253 new_inst.Instruction.NumDstRegs = 1;
254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
255 new_inst.Instruction.NumSrcRegs = 1;
256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
257 tctx->emit_instruction(tctx, &new_inst);
258 }
259 }
260
261 /* LRP - Linear Interpolate
262 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
263 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
264 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
265 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
266 *
267 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
268 * can then become: src0 \times src1 - (src0 \times src2 - src2)
269 *
270 * ; needs: 1 tmp
271 * MAD tmpA, src0, src2, -src2
272 * MAD dst, src0, src1, -tmpA
273 */
274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
275 #define LRP_TMP 1
276 static void
277 transform_lrp(struct tgsi_transform_context *tctx,
278 struct tgsi_full_instruction *inst)
279 {
280 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
281 struct tgsi_full_dst_register *dst = &inst->Dst[0];
282 struct tgsi_full_src_register *src0 = &inst->Src[0];
283 struct tgsi_full_src_register *src1 = &inst->Src[1];
284 struct tgsi_full_src_register *src2 = &inst->Src[2];
285 struct tgsi_full_instruction new_inst;
286
287 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
288 /* MAD tmpA, src0, src2, -src2 */
289 new_inst = tgsi_default_full_instruction();
290 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
291 new_inst.Instruction.NumDstRegs = 1;
292 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
293 new_inst.Instruction.NumSrcRegs = 3;
294 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
295 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
296 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
297 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
298 tctx->emit_instruction(tctx, &new_inst);
299
300 /* MAD dst, src0, src1, -tmpA */
301 new_inst = tgsi_default_full_instruction();
302 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
303 new_inst.Instruction.NumDstRegs = 1;
304 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
305 new_inst.Instruction.NumSrcRegs = 3;
306 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
307 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
308 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
309 new_inst.Src[2].Register.Negate = true;
310 tctx->emit_instruction(tctx, &new_inst);
311 }
312 }
313
314 /* FRC - Fraction
315 * dst.x = src.x - \lfloor src.x\rfloor
316 * dst.y = src.y - \lfloor src.y\rfloor
317 * dst.z = src.z - \lfloor src.z\rfloor
318 * dst.w = src.w - \lfloor src.w\rfloor
319 *
320 * ; needs: 1 tmp
321 * FLR tmpA, src
322 * SUB dst, src, tmpA
323 */
324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
325 #define FRC_TMP 1
326 static void
327 transform_frc(struct tgsi_transform_context *tctx,
328 struct tgsi_full_instruction *inst)
329 {
330 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
331 struct tgsi_full_dst_register *dst = &inst->Dst[0];
332 struct tgsi_full_src_register *src = &inst->Src[0];
333 struct tgsi_full_instruction new_inst;
334
335 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
336 /* FLR tmpA, src */
337 new_inst = tgsi_default_full_instruction();
338 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
339 new_inst.Instruction.NumDstRegs = 1;
340 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
341 new_inst.Instruction.NumSrcRegs = 1;
342 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
343 tctx->emit_instruction(tctx, &new_inst);
344
345 /* SUB dst, src, tmpA */
346 new_inst = tgsi_default_full_instruction();
347 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
348 new_inst.Instruction.NumDstRegs = 1;
349 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
350 new_inst.Instruction.NumSrcRegs = 2;
351 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
352 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
353 new_inst.Src[1].Register.Negate = 1;
354 tctx->emit_instruction(tctx, &new_inst);
355 }
356 }
357
358 /* POW - Power
359 * dst.x = src0.x^{src1.x}
360 * dst.y = src0.x^{src1.x}
361 * dst.z = src0.x^{src1.x}
362 * dst.w = src0.x^{src1.x}
363 *
364 * ; needs: 1 tmp
365 * LG2 tmpA.x, src0.x
366 * MUL tmpA.x, src1.x, tmpA.x
367 * EX2 dst, tmpA.x
368 */
369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
370 #define POW_TMP 1
371 static void
372 transform_pow(struct tgsi_transform_context *tctx,
373 struct tgsi_full_instruction *inst)
374 {
375 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
376 struct tgsi_full_dst_register *dst = &inst->Dst[0];
377 struct tgsi_full_src_register *src0 = &inst->Src[0];
378 struct tgsi_full_src_register *src1 = &inst->Src[1];
379 struct tgsi_full_instruction new_inst;
380
381 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
382 /* LG2 tmpA.x, src0.x */
383 new_inst = tgsi_default_full_instruction();
384 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
385 new_inst.Instruction.NumDstRegs = 1;
386 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
387 new_inst.Instruction.NumSrcRegs = 1;
388 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
389 tctx->emit_instruction(tctx, &new_inst);
390
391 /* MUL tmpA.x, src1.x, tmpA.x */
392 new_inst = tgsi_default_full_instruction();
393 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
394 new_inst.Instruction.NumDstRegs = 1;
395 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
396 new_inst.Instruction.NumSrcRegs = 2;
397 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
398 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
399 tctx->emit_instruction(tctx, &new_inst);
400
401 /* EX2 dst, tmpA.x */
402 new_inst = tgsi_default_full_instruction();
403 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
404 new_inst.Instruction.NumDstRegs = 1;
405 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
406 new_inst.Instruction.NumSrcRegs = 1;
407 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
408 tctx->emit_instruction(tctx, &new_inst);
409 }
410 }
411
412 /* LIT - Light Coefficients
413 * dst.x = 1.0
414 * dst.y = max(src.x, 0.0)
415 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
416 * dst.w = 1.0
417 *
418 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
419 * MAX tmpA.xy, src.xy, imm{0.0}
420 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
421 * LG2 tmpA.y, tmpA.y
422 * MUL tmpA.y, tmpA.z, tmpA.y
423 * EX2 tmpA.y, tmpA.y
424 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
425 * MOV dst.yz, tmpA.xy
426 * MOV dst.xw, imm{1.0}
427 */
428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
429 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
430 #define LIT_TMP 1
431 static void
432 transform_lit(struct tgsi_transform_context *tctx,
433 struct tgsi_full_instruction *inst)
434 {
435 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
436 struct tgsi_full_dst_register *dst = &inst->Dst[0];
437 struct tgsi_full_src_register *src = &inst->Src[0];
438 struct tgsi_full_instruction new_inst;
439
440 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
441 /* MAX tmpA.xy, src.xy, imm{0.0} */
442 new_inst = tgsi_default_full_instruction();
443 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
444 new_inst.Instruction.NumDstRegs = 1;
445 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
446 new_inst.Instruction.NumSrcRegs = 2;
447 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
448 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
449 tctx->emit_instruction(tctx, &new_inst);
450
451 /* MIN tmpA.z, src.w, imm{128.0} */
452 new_inst = tgsi_default_full_instruction();
453 new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
454 new_inst.Instruction.NumDstRegs = 1;
455 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
456 new_inst.Instruction.NumSrcRegs = 2;
457 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
458 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
459 tctx->emit_instruction(tctx, &new_inst);
460
461 /* MAX tmpA.z, tmpA.z, -imm{128.0} */
462 new_inst = tgsi_default_full_instruction();
463 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
464 new_inst.Instruction.NumDstRegs = 1;
465 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
466 new_inst.Instruction.NumSrcRegs = 2;
467 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
468 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
469 new_inst.Src[1].Register.Negate = true;
470 tctx->emit_instruction(tctx, &new_inst);
471
472 /* LG2 tmpA.y, tmpA.y */
473 new_inst = tgsi_default_full_instruction();
474 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
475 new_inst.Instruction.NumDstRegs = 1;
476 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
477 new_inst.Instruction.NumSrcRegs = 1;
478 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
479 tctx->emit_instruction(tctx, &new_inst);
480
481 /* MUL tmpA.y, tmpA.z, tmpA.y */
482 new_inst = tgsi_default_full_instruction();
483 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
484 new_inst.Instruction.NumDstRegs = 1;
485 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
486 new_inst.Instruction.NumSrcRegs = 2;
487 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
488 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
489 tctx->emit_instruction(tctx, &new_inst);
490
491 /* EX2 tmpA.y, tmpA.y */
492 new_inst = tgsi_default_full_instruction();
493 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
494 new_inst.Instruction.NumDstRegs = 1;
495 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
496 new_inst.Instruction.NumSrcRegs = 1;
497 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
498 tctx->emit_instruction(tctx, &new_inst);
499
500 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
501 new_inst = tgsi_default_full_instruction();
502 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
503 new_inst.Instruction.NumDstRegs = 1;
504 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
505 new_inst.Instruction.NumSrcRegs = 3;
506 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
507 new_inst.Src[0].Register.Negate = true;
508 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
509 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
510 tctx->emit_instruction(tctx, &new_inst);
511
512 /* MOV dst.yz, tmpA.xy */
513 new_inst = tgsi_default_full_instruction();
514 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
515 new_inst.Instruction.NumDstRegs = 1;
516 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
517 new_inst.Instruction.NumSrcRegs = 1;
518 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
519 tctx->emit_instruction(tctx, &new_inst);
520 }
521
522 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
523 /* MOV dst.xw, imm{1.0} */
524 new_inst = tgsi_default_full_instruction();
525 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
526 new_inst.Instruction.NumDstRegs = 1;
527 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
528 new_inst.Instruction.NumSrcRegs = 1;
529 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
530 tctx->emit_instruction(tctx, &new_inst);
531 }
532 }
533
534 /* EXP - Approximate Exponential Base 2
535 * dst.x = 2^{\lfloor src.x\rfloor}
536 * dst.y = src.x - \lfloor src.x\rfloor
537 * dst.z = 2^{src.x}
538 * dst.w = 1.0
539 *
540 * ; needs: 1 tmp, imm{1.0}
541 * if (lowering FLR) {
542 * FRC tmpA.x, src.x
543 * SUB tmpA.x, src.x, tmpA.x
544 * } else {
545 * FLR tmpA.x, src.x
546 * }
547 * EX2 tmpA.y, src.x
548 * SUB dst.y, src.x, tmpA.x
549 * EX2 dst.x, tmpA.x
550 * MOV dst.z, tmpA.y
551 * MOV dst.w, imm{1.0}
552 */
553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
554 NINST(1)+ NINST(1) - OINST(1))
555 #define EXP_TMP 1
556 static void
557 transform_exp(struct tgsi_transform_context *tctx,
558 struct tgsi_full_instruction *inst)
559 {
560 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
561 struct tgsi_full_dst_register *dst = &inst->Dst[0];
562 struct tgsi_full_src_register *src = &inst->Src[0];
563 struct tgsi_full_instruction new_inst;
564
565 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
566 if (ctx->config->lower_FLR) {
567 /* FRC tmpA.x, src.x */
568 new_inst = tgsi_default_full_instruction();
569 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
570 new_inst.Instruction.NumDstRegs = 1;
571 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
572 new_inst.Instruction.NumSrcRegs = 1;
573 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
574 tctx->emit_instruction(tctx, &new_inst);
575
576 /* SUB tmpA.x, src.x, tmpA.x */
577 new_inst = tgsi_default_full_instruction();
578 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
579 new_inst.Instruction.NumDstRegs = 1;
580 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
581 new_inst.Instruction.NumSrcRegs = 2;
582 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
583 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
584 new_inst.Src[1].Register.Negate = 1;
585 tctx->emit_instruction(tctx, &new_inst);
586 } else {
587 /* FLR tmpA.x, src.x */
588 new_inst = tgsi_default_full_instruction();
589 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
590 new_inst.Instruction.NumDstRegs = 1;
591 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
592 new_inst.Instruction.NumSrcRegs = 1;
593 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
594 tctx->emit_instruction(tctx, &new_inst);
595 }
596 }
597
598 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
599 /* EX2 tmpA.y, src.x */
600 new_inst = tgsi_default_full_instruction();
601 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
602 new_inst.Instruction.NumDstRegs = 1;
603 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
604 new_inst.Instruction.NumSrcRegs = 1;
605 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
606 tctx->emit_instruction(tctx, &new_inst);
607 }
608
609 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
610 /* SUB dst.y, src.x, tmpA.x */
611 new_inst = tgsi_default_full_instruction();
612 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
613 new_inst.Instruction.NumDstRegs = 1;
614 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
615 new_inst.Instruction.NumSrcRegs = 2;
616 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
617 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
618 new_inst.Src[1].Register.Negate = 1;
619 tctx->emit_instruction(tctx, &new_inst);
620 }
621
622 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
623 /* EX2 dst.x, tmpA.x */
624 new_inst = tgsi_default_full_instruction();
625 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
626 new_inst.Instruction.NumDstRegs = 1;
627 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
628 new_inst.Instruction.NumSrcRegs = 1;
629 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
630 tctx->emit_instruction(tctx, &new_inst);
631 }
632
633 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
634 /* MOV dst.z, tmpA.y */
635 new_inst = tgsi_default_full_instruction();
636 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
637 new_inst.Instruction.NumDstRegs = 1;
638 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
639 new_inst.Instruction.NumSrcRegs = 1;
640 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
641 tctx->emit_instruction(tctx, &new_inst);
642 }
643
644 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
645 /* MOV dst.w, imm{1.0} */
646 new_inst = tgsi_default_full_instruction();
647 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
648 new_inst.Instruction.NumDstRegs = 1;
649 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
650 new_inst.Instruction.NumSrcRegs = 1;
651 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
652 tctx->emit_instruction(tctx, &new_inst);
653 }
654 }
655
656 /* LOG - Approximate Logarithm Base 2
657 * dst.x = \lfloor\log_2{|src.x|}\rfloor
658 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
659 * dst.z = \log_2{|src.x|}
660 * dst.w = 1.0
661 *
662 * ; needs: 1 tmp, imm{1.0}
663 * LG2 tmpA.x, |src.x|
664 * if (lowering FLR) {
665 * FRC tmpA.y, tmpA.x
666 * SUB tmpA.y, tmpA.x, tmpA.y
667 * } else {
668 * FLR tmpA.y, tmpA.x
669 * }
670 * EX2 tmpA.z, tmpA.y
671 * RCP tmpA.z, tmpA.z
672 * MUL dst.y, |src.x|, tmpA.z
673 * MOV dst.xz, tmpA.yx
674 * MOV dst.w, imm{1.0}
675 */
676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
677 NINST(2) + NINST(1) + NINST(1) - OINST(1))
678 #define LOG_TMP 1
679 static void
680 transform_log(struct tgsi_transform_context *tctx,
681 struct tgsi_full_instruction *inst)
682 {
683 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
684 struct tgsi_full_dst_register *dst = &inst->Dst[0];
685 struct tgsi_full_src_register *src = &inst->Src[0];
686 struct tgsi_full_instruction new_inst;
687
688 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
689 /* LG2 tmpA.x, |src.x| */
690 new_inst = tgsi_default_full_instruction();
691 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
692 new_inst.Instruction.NumDstRegs = 1;
693 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
694 new_inst.Instruction.NumSrcRegs = 1;
695 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
696 new_inst.Src[0].Register.Absolute = true;
697 tctx->emit_instruction(tctx, &new_inst);
698 }
699
700 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
701 if (ctx->config->lower_FLR) {
702 /* FRC tmpA.y, tmpA.x */
703 new_inst = tgsi_default_full_instruction();
704 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
705 new_inst.Instruction.NumDstRegs = 1;
706 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
707 new_inst.Instruction.NumSrcRegs = 1;
708 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
709 tctx->emit_instruction(tctx, &new_inst);
710
711 /* SUB tmpA.y, tmpA.x, tmpA.y */
712 new_inst = tgsi_default_full_instruction();
713 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
714 new_inst.Instruction.NumDstRegs = 1;
715 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
716 new_inst.Instruction.NumSrcRegs = 2;
717 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
718 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
719 new_inst.Src[1].Register.Negate = 1;
720 tctx->emit_instruction(tctx, &new_inst);
721 } else {
722 /* FLR tmpA.y, tmpA.x */
723 new_inst = tgsi_default_full_instruction();
724 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
725 new_inst.Instruction.NumDstRegs = 1;
726 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
727 new_inst.Instruction.NumSrcRegs = 1;
728 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
729 tctx->emit_instruction(tctx, &new_inst);
730 }
731 }
732
733 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
734 /* EX2 tmpA.z, tmpA.y */
735 new_inst = tgsi_default_full_instruction();
736 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
737 new_inst.Instruction.NumDstRegs = 1;
738 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
739 new_inst.Instruction.NumSrcRegs = 1;
740 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
741 tctx->emit_instruction(tctx, &new_inst);
742
743 /* RCP tmpA.z, tmpA.z */
744 new_inst = tgsi_default_full_instruction();
745 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
746 new_inst.Instruction.NumDstRegs = 1;
747 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
748 new_inst.Instruction.NumSrcRegs = 1;
749 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
750 tctx->emit_instruction(tctx, &new_inst);
751
752 /* MUL dst.y, |src.x|, tmpA.z */
753 new_inst = tgsi_default_full_instruction();
754 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
755 new_inst.Instruction.NumDstRegs = 1;
756 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
757 new_inst.Instruction.NumSrcRegs = 2;
758 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
759 new_inst.Src[0].Register.Absolute = true;
760 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
761 tctx->emit_instruction(tctx, &new_inst);
762 }
763
764 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
765 /* MOV dst.xz, tmpA.yx */
766 new_inst = tgsi_default_full_instruction();
767 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
768 new_inst.Instruction.NumDstRegs = 1;
769 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
770 new_inst.Instruction.NumSrcRegs = 1;
771 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
772 tctx->emit_instruction(tctx, &new_inst);
773 }
774
775 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
776 /* MOV dst.w, imm{1.0} */
777 new_inst = tgsi_default_full_instruction();
778 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
779 new_inst.Instruction.NumDstRegs = 1;
780 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
781 new_inst.Instruction.NumSrcRegs = 1;
782 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
783 tctx->emit_instruction(tctx, &new_inst);
784 }
785 }
786
787 /* DP4 - 4-component Dot Product
788 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
789 *
790 * DP3 - 3-component Dot Product
791 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
792 *
793 * DP2 - 2-component Dot Product
794 * dst = src0.x \times src1.x + src0.y \times src1.y
795 *
796 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
797 * operations, which is what you'd prefer for a ISA that is natively
798 * scalar. Probably a native vector ISA would at least already have
799 * DP4/DP3 instructions, but perhaps there is room for an alternative
800 * translation for DP2 using vector instructions.
801 *
802 * ; needs: 1 tmp
803 * MUL tmpA.x, src0.x, src1.x
804 * MAD tmpA.x, src0.y, src1.y, tmpA.x
805 * if (DP3 || DP4) {
806 * MAD tmpA.x, src0.z, src1.z, tmpA.x
807 * if (DP4) {
808 * MAD tmpA.x, src0.w, src1.w, tmpA.x
809 * }
810 * }
811 * ; fixup last instruction to replicate into dst
812 */
813 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
814 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
815 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
816 #define DOTP_TMP 1
817 static void
818 transform_dotp(struct tgsi_transform_context *tctx,
819 struct tgsi_full_instruction *inst)
820 {
821 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
822 struct tgsi_full_dst_register *dst = &inst->Dst[0];
823 struct tgsi_full_src_register *src0 = &inst->Src[0];
824 struct tgsi_full_src_register *src1 = &inst->Src[1];
825 struct tgsi_full_instruction new_inst;
826 enum tgsi_opcode opcode = inst->Instruction.Opcode;
827
828 /* NOTE: any potential last instruction must replicate src on all
829 * components (since it could be re-written to write to final dst)
830 */
831
832 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
833 /* MUL tmpA.x, src0.x, src1.x */
834 new_inst = tgsi_default_full_instruction();
835 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
836 new_inst.Instruction.NumDstRegs = 1;
837 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
838 new_inst.Instruction.NumSrcRegs = 2;
839 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
840 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
841 tctx->emit_instruction(tctx, &new_inst);
842
843 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
844 new_inst = tgsi_default_full_instruction();
845 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
846 new_inst.Instruction.NumDstRegs = 1;
847 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
848 new_inst.Instruction.NumSrcRegs = 3;
849 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
850 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
851 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
852
853 if ((opcode == TGSI_OPCODE_DP3) ||
854 (opcode == TGSI_OPCODE_DP4)) {
855 tctx->emit_instruction(tctx, &new_inst);
856
857 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
858 new_inst = tgsi_default_full_instruction();
859 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
860 new_inst.Instruction.NumDstRegs = 1;
861 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
862 new_inst.Instruction.NumSrcRegs = 3;
863 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
864 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
865 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
866
867 if (opcode == TGSI_OPCODE_DP4) {
868 tctx->emit_instruction(tctx, &new_inst);
869
870 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
871 new_inst = tgsi_default_full_instruction();
872 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
873 new_inst.Instruction.NumDstRegs = 1;
874 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
875 new_inst.Instruction.NumSrcRegs = 3;
876 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
877 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
878 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
879 }
880 }
881
882 /* fixup last instruction to write to dst: */
883 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
884
885 tctx->emit_instruction(tctx, &new_inst);
886 }
887 }
888
889 /* FLR - floor, CEIL - ceil
890 * ; needs: 1 tmp
891 * if (CEIL) {
892 * FRC tmpA, -src
893 * ADD dst, src, tmpA
894 * } else {
895 * FRC tmpA, src
896 * SUB dst, src, tmpA
897 * }
898 */
899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
901 #define FLR_TMP 1
902 #define CEIL_TMP 1
903 static void
904 transform_flr_ceil(struct tgsi_transform_context *tctx,
905 struct tgsi_full_instruction *inst)
906 {
907 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
908 struct tgsi_full_dst_register *dst = &inst->Dst[0];
909 struct tgsi_full_src_register *src0 = &inst->Src[0];
910 struct tgsi_full_instruction new_inst;
911 enum tgsi_opcode opcode = inst->Instruction.Opcode;
912
913 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
914 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
915 new_inst = tgsi_default_full_instruction();
916 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
917 new_inst.Instruction.NumDstRegs = 1;
918 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
919 new_inst.Instruction.NumSrcRegs = 1;
920 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
921
922 if (opcode == TGSI_OPCODE_CEIL)
923 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
924 tctx->emit_instruction(tctx, &new_inst);
925
926 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
927 new_inst = tgsi_default_full_instruction();
928 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
929 new_inst.Instruction.NumDstRegs = 1;
930 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
931 new_inst.Instruction.NumSrcRegs = 2;
932 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
933 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
934 if (opcode == TGSI_OPCODE_FLR)
935 new_inst.Src[1].Register.Negate = 1;
936 tctx->emit_instruction(tctx, &new_inst);
937 }
938 }
939
940 /* TRUNC - truncate off fractional part
941 * dst.x = trunc(src.x)
942 * dst.y = trunc(src.y)
943 * dst.z = trunc(src.z)
944 * dst.w = trunc(src.w)
945 *
946 * ; needs: 1 tmp
947 * if (lower FLR) {
948 * FRC tmpA, |src|
949 * SUB tmpA, |src|, tmpA
950 * } else {
951 * FLR tmpA, |src|
952 * }
953 * CMP dst, src, -tmpA, tmpA
954 */
955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
956 #define TRUNC_TMP 1
957 static void
958 transform_trunc(struct tgsi_transform_context *tctx,
959 struct tgsi_full_instruction *inst)
960 {
961 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
962 struct tgsi_full_dst_register *dst = &inst->Dst[0];
963 struct tgsi_full_src_register *src0 = &inst->Src[0];
964 struct tgsi_full_instruction new_inst;
965
966 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
967 if (ctx->config->lower_FLR) {
968 new_inst = tgsi_default_full_instruction();
969 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
970 new_inst.Instruction.NumDstRegs = 1;
971 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
972 new_inst.Instruction.NumSrcRegs = 1;
973 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
974 new_inst.Src[0].Register.Absolute = true;
975 new_inst.Src[0].Register.Negate = false;
976 tctx->emit_instruction(tctx, &new_inst);
977
978 new_inst = tgsi_default_full_instruction();
979 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
980 new_inst.Instruction.NumDstRegs = 1;
981 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
982 new_inst.Instruction.NumSrcRegs = 2;
983 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
984 new_inst.Src[0].Register.Absolute = true;
985 new_inst.Src[0].Register.Negate = false;
986 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
987 new_inst.Src[1].Register.Negate = 1;
988 tctx->emit_instruction(tctx, &new_inst);
989 } else {
990 new_inst = tgsi_default_full_instruction();
991 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
992 new_inst.Instruction.NumDstRegs = 1;
993 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
994 new_inst.Instruction.NumSrcRegs = 1;
995 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
996 new_inst.Src[0].Register.Absolute = true;
997 new_inst.Src[0].Register.Negate = false;
998 tctx->emit_instruction(tctx, &new_inst);
999 }
1000
1001 new_inst = tgsi_default_full_instruction();
1002 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1003 new_inst.Instruction.NumDstRegs = 1;
1004 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005 new_inst.Instruction.NumSrcRegs = 3;
1006 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1007 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1008 new_inst.Src[1].Register.Negate = true;
1009 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1010 tctx->emit_instruction(tctx, &new_inst);
1011 }
1012 }
1013
1014 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1015 * in the case of TXP, the clamping must happen *after* projection, so
1016 * we need to lower TXP to TEX.
1017 *
1018 * MOV tmpA, src0
1019 * if (opc == TXP) {
1020 * ; do perspective division manually before clamping:
1021 * RCP tmpB, tmpA.w
1022 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1023 * opc = TEX;
1024 * }
1025 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1026 * <opc> dst, tmpA, ...
1027 */
1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1029 #define SAMP_TMP 2
1030 static int
1031 transform_samp(struct tgsi_transform_context *tctx,
1032 struct tgsi_full_instruction *inst)
1033 {
1034 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1035 struct tgsi_full_src_register *coord = &inst->Src[0];
1036 struct tgsi_full_src_register *samp;
1037 struct tgsi_full_instruction new_inst;
1038 /* mask is clamped coords, pmask is all coords (for projection): */
1039 unsigned mask = 0, pmask = 0, smask;
1040 unsigned tex = inst->Texture.Texture;
1041 enum tgsi_opcode opcode = inst->Instruction.Opcode;
1042 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1043 (ctx->config->lower_TXP & (1 << tex));
1044
1045 if (opcode == TGSI_OPCODE_TXB2) {
1046 samp = &inst->Src[2];
1047 } else {
1048 samp = &inst->Src[1];
1049 }
1050
1051 /* convert sampler # to bitmask to test: */
1052 smask = 1 << samp->Register.Index;
1053
1054 /* check if we actually need to lower this one: */
1055 if (!(ctx->saturate & smask) && !lower_txp)
1056 return -1;
1057
1058 /* figure out which coordinates need saturating:
1059 * - RECT textures should not get saturated
1060 * - array index coords should not get saturated
1061 */
1062 switch (tex) {
1063 case TGSI_TEXTURE_3D:
1064 case TGSI_TEXTURE_CUBE:
1065 case TGSI_TEXTURE_CUBE_ARRAY:
1066 case TGSI_TEXTURE_SHADOWCUBE:
1067 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1068 if (ctx->config->saturate_r & smask)
1069 mask |= TGSI_WRITEMASK_Z;
1070 pmask |= TGSI_WRITEMASK_Z;
1071 /* fallthrough */
1072
1073 case TGSI_TEXTURE_2D:
1074 case TGSI_TEXTURE_2D_ARRAY:
1075 case TGSI_TEXTURE_SHADOW2D:
1076 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1077 case TGSI_TEXTURE_2D_MSAA:
1078 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1079 if (ctx->config->saturate_t & smask)
1080 mask |= TGSI_WRITEMASK_Y;
1081 pmask |= TGSI_WRITEMASK_Y;
1082 /* fallthrough */
1083
1084 case TGSI_TEXTURE_1D:
1085 case TGSI_TEXTURE_1D_ARRAY:
1086 case TGSI_TEXTURE_SHADOW1D:
1087 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1088 if (ctx->config->saturate_s & smask)
1089 mask |= TGSI_WRITEMASK_X;
1090 pmask |= TGSI_WRITEMASK_X;
1091 break;
1092
1093 case TGSI_TEXTURE_RECT:
1094 case TGSI_TEXTURE_SHADOWRECT:
1095 /* we don't saturate, but in case of lower_txp we
1096 * still need to do the perspective divide:
1097 */
1098 pmask = TGSI_WRITEMASK_XY;
1099 break;
1100 }
1101
1102 /* sanity check.. driver could be asking to saturate a non-
1103 * existent coordinate component:
1104 */
1105 if (!mask && !lower_txp)
1106 return -1;
1107
1108 /* MOV tmpA, src0 */
1109 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1110
1111 /* This is a bit sad.. we need to clamp *after* the coords
1112 * are projected, which means lowering TXP to TEX and doing
1113 * the projection ourself. But since I haven't figured out
1114 * how to make the lowering code deliver an electric shock
1115 * to anyone using GL_CLAMP, we must do this instead:
1116 */
1117 if (opcode == TGSI_OPCODE_TXP) {
1118 /* RCP tmpB.x tmpA.w */
1119 new_inst = tgsi_default_full_instruction();
1120 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1121 new_inst.Instruction.NumDstRegs = 1;
1122 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1123 new_inst.Instruction.NumSrcRegs = 1;
1124 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1125 tctx->emit_instruction(tctx, &new_inst);
1126
1127 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128 new_inst = tgsi_default_full_instruction();
1129 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1130 new_inst.Instruction.NumDstRegs = 1;
1131 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1132 new_inst.Instruction.NumSrcRegs = 2;
1133 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1134 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1135 tctx->emit_instruction(tctx, &new_inst);
1136
1137 opcode = TGSI_OPCODE_TEX;
1138 }
1139
1140 /* MOV_SAT tmpA.<mask>, tmpA */
1141 if (mask) {
1142 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1143 }
1144
1145 /* modify the texture samp instruction to take fixed up coord: */
1146 new_inst = *inst;
1147 new_inst.Instruction.Opcode = opcode;
1148 new_inst.Src[0] = ctx->tmp[A].src;
1149 tctx->emit_instruction(tctx, &new_inst);
1150
1151 return 0;
1152 }
1153
1154 /* Two-sided color emulation:
1155 * For each COLOR input, create a corresponding BCOLOR input, plus
1156 * CMP instruction to select front or back color based on FACE
1157 */
1158 #define TWOSIDE_GROW(n) ( \
1159 2 + /* FACE */ \
1160 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161 ((n) * 1) + /* TEMP[] */ \
1162 ((n) * NINST(3)) /* CMP instr */ \
1163 )
1164
1165 static void
1166 emit_twoside(struct tgsi_transform_context *tctx)
1167 {
1168 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1169 struct tgsi_shader_info *info = ctx->info;
1170 struct tgsi_full_declaration decl;
1171 struct tgsi_full_instruction new_inst;
1172 unsigned inbase, tmpbase;
1173 unsigned i;
1174
1175 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1176 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1177
1178 /* additional inputs for BCOLOR's */
1179 for (i = 0; i < ctx->two_side_colors; i++) {
1180 unsigned in_idx = ctx->two_side_idx[i];
1181 decl = tgsi_default_full_declaration();
1182 decl.Declaration.File = TGSI_FILE_INPUT;
1183 decl.Declaration.Semantic = true;
1184 decl.Range.First = decl.Range.Last = inbase + i;
1185 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1186 decl.Semantic.Index = info->input_semantic_index[in_idx];
1187 decl.Declaration.Interpolate = true;
1188 decl.Interp.Interpolate = info->input_interpolate[in_idx];
1189 decl.Interp.Location = info->input_interpolate_loc[in_idx];
1190 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1191 tctx->emit_declaration(tctx, &decl);
1192 }
1193
1194 /* additional input for FACE */
1195 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196 decl = tgsi_default_full_declaration();
1197 decl.Declaration.File = TGSI_FILE_INPUT;
1198 decl.Declaration.Semantic = true;
1199 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201 decl.Semantic.Index = 0;
1202 tctx->emit_declaration(tctx, &decl);
1203
1204 ctx->face_idx = decl.Range.First;
1205 }
1206
1207 /* additional temps for COLOR/BCOLOR selection: */
1208 for (i = 0; i < ctx->two_side_colors; i++) {
1209 decl = tgsi_default_full_declaration();
1210 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212 tctx->emit_declaration(tctx, &decl);
1213 }
1214
1215 /* and finally additional instructions to select COLOR/BCOLOR: */
1216 for (i = 0; i < ctx->two_side_colors; i++) {
1217 new_inst = tgsi_default_full_instruction();
1218 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219
1220 new_inst.Instruction.NumDstRegs = 1;
1221 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1222 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224
1225 new_inst.Instruction.NumSrcRegs = 3;
1226 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1227 new_inst.Src[0].Register.Index = ctx->face_idx;
1228 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1233 new_inst.Src[1].Register.Index = inbase + i;
1234 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1239 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244
1245 tctx->emit_instruction(tctx, &new_inst);
1246 }
1247 }
1248
1249 static void
1250 emit_decls(struct tgsi_transform_context *tctx)
1251 {
1252 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253 struct tgsi_shader_info *info = ctx->info;
1254 struct tgsi_full_declaration decl;
1255 struct tgsi_full_immediate immed;
1256 unsigned tmpbase;
1257 unsigned i;
1258
1259 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260
1261 ctx->color_base = tmpbase + ctx->numtmp;
1262
1263 /* declare immediate: */
1264 immed = tgsi_default_full_immediate();
1265 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266 immed.u[0].Float = 0.0;
1267 immed.u[1].Float = 1.0;
1268 immed.u[2].Float = 128.0;
1269 immed.u[3].Float = 0.0;
1270 tctx->emit_immediate(tctx, &immed);
1271
1272 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273 ctx->imm.Register.Index = info->immediate_count;
1274 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278
1279 /* declare temp regs: */
1280 for (i = 0; i < ctx->numtmp; i++) {
1281 decl = tgsi_default_full_declaration();
1282 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283 decl.Range.First = decl.Range.Last = tmpbase + i;
1284 tctx->emit_declaration(tctx, &decl);
1285
1286 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1287 ctx->tmp[i].src.Register.Index = tmpbase + i;
1288 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292
1293 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1294 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296 }
1297
1298 if (ctx->two_side_colors)
1299 emit_twoside(tctx);
1300 }
1301
1302 static void
1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
1304 struct tgsi_full_instruction *inst)
1305 {
1306 unsigned i, j;
1307 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308 struct tgsi_src_register *src = &inst->Src[i].Register;
1309 if (src->File == TGSI_FILE_INPUT) {
1310 for (j = 0; j < ctx->two_side_colors; j++) {
1311 if (src->Index == (int)ctx->two_side_idx[j]) {
1312 src->File = TGSI_FILE_TEMPORARY;
1313 src->Index = ctx->color_base + j;
1314 break;
1315 }
1316 }
1317 }
1318 }
1319
1320 }
1321
1322 static void
1323 transform_instr(struct tgsi_transform_context *tctx,
1324 struct tgsi_full_instruction *inst)
1325 {
1326 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327
1328 if (!ctx->emitted_decls) {
1329 emit_decls(tctx);
1330 ctx->emitted_decls = 1;
1331 }
1332
1333 /* if emulating two-sided-color, we need to re-write some
1334 * src registers:
1335 */
1336 if (ctx->two_side_colors)
1337 rename_color_inputs(ctx, inst);
1338
1339 switch (inst->Instruction.Opcode) {
1340 case TGSI_OPCODE_DST:
1341 if (!ctx->config->lower_DST)
1342 goto skip;
1343 transform_dst(tctx, inst);
1344 break;
1345 case TGSI_OPCODE_LRP:
1346 if (!ctx->config->lower_LRP)
1347 goto skip;
1348 transform_lrp(tctx, inst);
1349 break;
1350 case TGSI_OPCODE_FRC:
1351 if (!ctx->config->lower_FRC)
1352 goto skip;
1353 transform_frc(tctx, inst);
1354 break;
1355 case TGSI_OPCODE_POW:
1356 if (!ctx->config->lower_POW)
1357 goto skip;
1358 transform_pow(tctx, inst);
1359 break;
1360 case TGSI_OPCODE_LIT:
1361 if (!ctx->config->lower_LIT)
1362 goto skip;
1363 transform_lit(tctx, inst);
1364 break;
1365 case TGSI_OPCODE_EXP:
1366 if (!ctx->config->lower_EXP)
1367 goto skip;
1368 transform_exp(tctx, inst);
1369 break;
1370 case TGSI_OPCODE_LOG:
1371 if (!ctx->config->lower_LOG)
1372 goto skip;
1373 transform_log(tctx, inst);
1374 break;
1375 case TGSI_OPCODE_DP4:
1376 if (!ctx->config->lower_DP4)
1377 goto skip;
1378 transform_dotp(tctx, inst);
1379 break;
1380 case TGSI_OPCODE_DP3:
1381 if (!ctx->config->lower_DP3)
1382 goto skip;
1383 transform_dotp(tctx, inst);
1384 break;
1385 case TGSI_OPCODE_DP2:
1386 if (!ctx->config->lower_DP2)
1387 goto skip;
1388 transform_dotp(tctx, inst);
1389 break;
1390 case TGSI_OPCODE_FLR:
1391 if (!ctx->config->lower_FLR)
1392 goto skip;
1393 transform_flr_ceil(tctx, inst);
1394 break;
1395 case TGSI_OPCODE_CEIL:
1396 if (!ctx->config->lower_CEIL)
1397 goto skip;
1398 transform_flr_ceil(tctx, inst);
1399 break;
1400 case TGSI_OPCODE_TRUNC:
1401 if (!ctx->config->lower_TRUNC)
1402 goto skip;
1403 transform_trunc(tctx, inst);
1404 break;
1405 case TGSI_OPCODE_TEX:
1406 case TGSI_OPCODE_TXP:
1407 case TGSI_OPCODE_TXB:
1408 case TGSI_OPCODE_TXB2:
1409 case TGSI_OPCODE_TXL:
1410 if (transform_samp(tctx, inst))
1411 goto skip;
1412 break;
1413 default:
1414 skip:
1415 tctx->emit_instruction(tctx, inst);
1416 break;
1417 }
1418 }
1419
1420 /* returns NULL if no lowering required, else returns the new
1421 * tokens (which caller is required to free()). In either case
1422 * returns the current info.
1423 */
1424 const struct tgsi_token *
1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426 const struct tgsi_token *tokens,
1427 struct tgsi_shader_info *info)
1428 {
1429 struct tgsi_lowering_context ctx;
1430 struct tgsi_token *newtoks;
1431 int newlen, numtmp;
1432
1433 /* sanity check in case limit is ever increased: */
1434 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435
1436 /* sanity check the lowering */
1437 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438 assert(!(config->lower_FRC && config->lower_TRUNC));
1439
1440 memset(&ctx, 0, sizeof(ctx));
1441 ctx.base.transform_instruction = transform_instr;
1442 ctx.info = info;
1443 ctx.config = config;
1444
1445 tgsi_scan_shader(tokens, info);
1446
1447 /* if we are adding fragment shader support to emulate two-sided
1448 * color, then figure out the number of additional inputs we need
1449 * to create for BCOLOR's..
1450 */
1451 if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452 config->color_two_side) {
1453 int i;
1454 ctx.face_idx = -1;
1455 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457 ctx.two_side_idx[ctx.two_side_colors++] = i;
1458 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459 ctx.face_idx = i;
1460 }
1461 }
1462
1463 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466 /* if there are no instructions to lower, then we are done: */
1467 if (!(OPCS(DST) ||
1468 OPCS(LRP) ||
1469 OPCS(FRC) ||
1470 OPCS(POW) ||
1471 OPCS(LIT) ||
1472 OPCS(EXP) ||
1473 OPCS(LOG) ||
1474 OPCS(DP4) ||
1475 OPCS(DP3) ||
1476 OPCS(DP2) ||
1477 OPCS(FLR) ||
1478 OPCS(CEIL) ||
1479 OPCS(TRUNC) ||
1480 OPCS(TXP) ||
1481 ctx.two_side_colors ||
1482 ctx.saturate))
1483 return NULL;
1484
1485 #if 0 /* debug */
1486 _debug_printf("BEFORE:");
1487 tgsi_dump(tokens, 0);
1488 #endif
1489
1490 numtmp = 0;
1491 newlen = tgsi_num_tokens(tokens);
1492 if (OPCS(DST)) {
1493 newlen += DST_GROW * OPCS(DST);
1494 numtmp = MAX2(numtmp, DST_TMP);
1495 }
1496 if (OPCS(LRP)) {
1497 newlen += LRP_GROW * OPCS(LRP);
1498 numtmp = MAX2(numtmp, LRP_TMP);
1499 }
1500 if (OPCS(FRC)) {
1501 newlen += FRC_GROW * OPCS(FRC);
1502 numtmp = MAX2(numtmp, FRC_TMP);
1503 }
1504 if (OPCS(POW)) {
1505 newlen += POW_GROW * OPCS(POW);
1506 numtmp = MAX2(numtmp, POW_TMP);
1507 }
1508 if (OPCS(LIT)) {
1509 newlen += LIT_GROW * OPCS(LIT);
1510 numtmp = MAX2(numtmp, LIT_TMP);
1511 }
1512 if (OPCS(EXP)) {
1513 newlen += EXP_GROW * OPCS(EXP);
1514 numtmp = MAX2(numtmp, EXP_TMP);
1515 }
1516 if (OPCS(LOG)) {
1517 newlen += LOG_GROW * OPCS(LOG);
1518 numtmp = MAX2(numtmp, LOG_TMP);
1519 }
1520 if (OPCS(DP4)) {
1521 newlen += DP4_GROW * OPCS(DP4);
1522 numtmp = MAX2(numtmp, DOTP_TMP);
1523 }
1524 if (OPCS(DP3)) {
1525 newlen += DP3_GROW * OPCS(DP3);
1526 numtmp = MAX2(numtmp, DOTP_TMP);
1527 }
1528 if (OPCS(DP2)) {
1529 newlen += DP2_GROW * OPCS(DP2);
1530 numtmp = MAX2(numtmp, DOTP_TMP);
1531 }
1532 if (OPCS(FLR)) {
1533 newlen += FLR_GROW * OPCS(FLR);
1534 numtmp = MAX2(numtmp, FLR_TMP);
1535 }
1536 if (OPCS(CEIL)) {
1537 newlen += CEIL_GROW * OPCS(CEIL);
1538 numtmp = MAX2(numtmp, CEIL_TMP);
1539 }
1540 if (OPCS(TRUNC)) {
1541 newlen += TRUNC_GROW * OPCS(TRUNC);
1542 numtmp = MAX2(numtmp, TRUNC_TMP);
1543 }
1544 if (ctx.saturate || config->lower_TXP) {
1545 int n = 0;
1546
1547 if (ctx.saturate) {
1548 n = info->opcode_count[TGSI_OPCODE_TEX] +
1549 info->opcode_count[TGSI_OPCODE_TXP] +
1550 info->opcode_count[TGSI_OPCODE_TXB] +
1551 info->opcode_count[TGSI_OPCODE_TXB2] +
1552 info->opcode_count[TGSI_OPCODE_TXL];
1553 } else if (config->lower_TXP) {
1554 n = info->opcode_count[TGSI_OPCODE_TXP];
1555 }
1556
1557 newlen += SAMP_GROW * n;
1558 numtmp = MAX2(numtmp, SAMP_TMP);
1559 }
1560
1561 /* specifically don't include two_side_colors temps in the count: */
1562 ctx.numtmp = numtmp;
1563
1564 if (ctx.two_side_colors) {
1565 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566 /* note: we permanently consume temp regs, re-writing references
1567 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568 * instruction that selects which varying to use):
1569 */
1570 numtmp += ctx.two_side_colors;
1571 }
1572
1573 newlen += 2 * numtmp;
1574 newlen += 5; /* immediate */
1575
1576 newtoks = tgsi_alloc_tokens(newlen);
1577 if (!newtoks)
1578 return NULL;
1579
1580 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1581
1582 tgsi_scan_shader(newtoks, info);
1583
1584 #if 0 /* debug */
1585 _debug_printf("AFTER:");
1586 tgsi_dump(newtoks, 0);
1587 #endif
1588
1589 return newtoks;
1590 }