ffc7eaea53f785fe226e9ce51416cf5b96a4b67f
[mesa.git] / src / gallium / drivers / freedreno / freedreno_lowering.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "tgsi/tgsi_transform.h"
30 #include "tgsi/tgsi_scan.h"
31 #include "tgsi/tgsi_dump.h"
32
33 #include "util/u_debug.h"
34 #include "util/u_math.h"
35
36 #include "freedreno_lowering.h"
37
38 struct fd_lowering_context {
39 struct tgsi_transform_context base;
40 const struct fd_lowering_config *config;
41 struct tgsi_shader_info *info;
42 unsigned two_side_colors;
43 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
44 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
45 int face_idx;
46 unsigned numtmp;
47 struct {
48 struct tgsi_full_src_register src;
49 struct tgsi_full_dst_register dst;
50 } tmp[2];
51 #define A 0
52 #define B 1
53 struct tgsi_full_src_register imm;
54 int emitted_decls;
55 };
56
57 static inline struct fd_lowering_context *
58 fd_lowering_context(struct tgsi_transform_context *tctx)
59 {
60 return (struct fd_lowering_context *)tctx;
61 }
62
63 /*
64 * Utility helpers:
65 */
66
67 static void
68 reg_dst(struct tgsi_full_dst_register *dst,
69 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
70 {
71 *dst = *orig_dst;
72 dst->Register.WriteMask &= wrmask;
73 assert(dst->Register.WriteMask);
74 }
75
76 static inline void
77 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
78 {
79 swiz[0] = src->SwizzleX;
80 swiz[1] = src->SwizzleY;
81 swiz[2] = src->SwizzleZ;
82 swiz[3] = src->SwizzleW;
83 }
84
85 static void
86 reg_src(struct tgsi_full_src_register *src,
87 const struct tgsi_full_src_register *orig_src,
88 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
89 {
90 unsigned swiz[4];
91 get_swiz(swiz, &orig_src->Register);
92 *src = *orig_src;
93 src->Register.SwizzleX = swiz[sx];
94 src->Register.SwizzleY = swiz[sy];
95 src->Register.SwizzleZ = swiz[sz];
96 src->Register.SwizzleW = swiz[sw];
97 }
98
99 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
100 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
101 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
102
103 /*
104 * if (dst.x aliases src.x) {
105 * MOV tmpA.x, src.x
106 * src = tmpA
107 * }
108 * COS dst.x, src.x
109 * SIN dst.y, src.x
110 * MOV dst.zw, imm{0.0, 1.0}
111 */
112 static bool
113 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
114 const struct tgsi_full_src_register *src, unsigned src_mask)
115 {
116 if ((dst->Register.File == src->Register.File) &&
117 (dst->Register.Index == src->Register.Index)) {
118 unsigned i, actual_mask = 0;
119 unsigned swiz[4];
120 get_swiz(swiz, &src->Register);
121 for (i = 0; i < 4; i++)
122 if (src_mask & (1 << i))
123 actual_mask |= (1 << swiz[i]);
124 if (actual_mask & dst_mask)
125 return true;
126 }
127 return false;
128 }
129
130 static void
131 create_mov(struct tgsi_transform_context *tctx,
132 const struct tgsi_full_dst_register *dst,
133 const struct tgsi_full_src_register *src, unsigned mask)
134 {
135 struct tgsi_full_instruction new_inst;
136
137 new_inst = tgsi_default_full_instruction();
138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
139 new_inst.Instruction.NumDstRegs = 1;
140 reg_dst(&new_inst.Dst[0], dst, mask);
141 new_inst.Instruction.NumSrcRegs = 1;
142 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
143 tctx->emit_instruction(tctx, &new_inst);
144 }
145
146 /*
147 * Lowering Translators:
148 */
149
150 /* DST - Distance Vector
151 * dst.x = 1.0
152 * dst.y = src0.y \times src1.y
153 * dst.z = src0.z
154 * dst.w = src1.w
155 *
156 * ; note: could be more clever and use just a single temp
157 * ; if I was clever enough to re-write the swizzles.
158 * ; needs: 2 tmp, imm{1.0}
159 * if (dst.y aliases src0.z) {
160 * MOV tmpA.yz, src0.yz
161 * src0 = tmpA
162 * }
163 * if (dst.yz aliases src1.w) {
164 * MOV tmpB.yw, src1.yw
165 * src1 = tmpB
166 * }
167 * MUL dst.y, src0.y, src1.y
168 * MOV dst.z, src0.z
169 * MOV dst.w, src1.w
170 * MOV dst.x, imm{1.0}
171 */
172 #define DST_GROW (19 - 4)
173 #define DST_TMP 2
174 static void
175 transform_dst(struct tgsi_transform_context *tctx,
176 struct tgsi_full_instruction *inst)
177 {
178 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
179 struct tgsi_full_dst_register *dst = &inst->Dst[0];
180 struct tgsi_full_src_register *src0 = &inst->Src[0];
181 struct tgsi_full_src_register *src1 = &inst->Src[1];
182 struct tgsi_full_instruction new_inst;
183
184 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
185 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ);
186 src0 = &ctx->tmp[A].src;
187 }
188
189 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
190 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW);
191 src1 = &ctx->tmp[B].src;
192 }
193
194 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
195 /* MUL dst.y, src0.y, src1.y */
196 new_inst = tgsi_default_full_instruction();
197 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
198 new_inst.Instruction.NumDstRegs = 1;
199 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
200 new_inst.Instruction.NumSrcRegs = 2;
201 reg_src(&new_inst.Src[0], src0, SWIZ(_,Y,_,_));
202 reg_src(&new_inst.Src[1], src1, SWIZ(_,Y,_,_));
203 tctx->emit_instruction(tctx, &new_inst);
204 }
205
206 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
207 /* MOV dst.z, src0.z */
208 new_inst = tgsi_default_full_instruction();
209 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
210 new_inst.Instruction.NumDstRegs = 1;
211 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
212 new_inst.Instruction.NumSrcRegs = 1;
213 reg_src(&new_inst.Src[0], src0, SWIZ(_,_,Z,_));
214 tctx->emit_instruction(tctx, &new_inst);
215 }
216
217 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
218 /* MOV dst.w, src1.w */
219 new_inst = tgsi_default_full_instruction();
220 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
221 new_inst.Instruction.NumDstRegs = 1;
222 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
223 new_inst.Instruction.NumSrcRegs = 1;
224 reg_src(&new_inst.Src[0], src1, SWIZ(_,_,_,W));
225 tctx->emit_instruction(tctx, &new_inst);
226 }
227
228 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
229 /* MOV dst.x, imm{1.0} */
230 new_inst = tgsi_default_full_instruction();
231 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
232 new_inst.Instruction.NumDstRegs = 1;
233 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
234 new_inst.Instruction.NumSrcRegs = 1;
235 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,_));
236 tctx->emit_instruction(tctx, &new_inst);
237 }
238 }
239
240 /* XPD - Cross Product
241 * dst.x = src0.y \times src1.z - src1.y \times src0.z
242 * dst.y = src0.z \times src1.x - src1.z \times src0.x
243 * dst.z = src0.x \times src1.y - src1.x \times src0.y
244 * dst.w = 1.0
245 *
246 * ; needs: 2 tmp, imm{1.0}
247 * MUL tmpA.xyz, src0.yzx, src1.zxy
248 * MUL tmpB.xyz, src1.yzx, src0.zxy
249 * SUB dst.xyz, tmpA.xyz, tmpB.xyz
250 * MOV dst.w, imm{1.0}
251 */
252 #define XPD_GROW (15 - 4)
253 #define XPD_TMP 2
254 static void
255 transform_xpd(struct tgsi_transform_context *tctx,
256 struct tgsi_full_instruction *inst)
257 {
258 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
259 struct tgsi_full_dst_register *dst = &inst->Dst[0];
260 struct tgsi_full_src_register *src0 = &inst->Src[0];
261 struct tgsi_full_src_register *src1 = &inst->Src[1];
262 struct tgsi_full_instruction new_inst;
263
264 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
265 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
266 new_inst = tgsi_default_full_instruction();
267 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
268 new_inst.Instruction.NumDstRegs = 1;
269 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
270 new_inst.Instruction.NumSrcRegs = 2;
271 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Z,X,_));
272 reg_src(&new_inst.Src[1], src1, SWIZ(Z,X,Y,_));
273 tctx->emit_instruction(tctx, &new_inst);
274
275 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
276 new_inst = tgsi_default_full_instruction();
277 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
278 new_inst.Instruction.NumDstRegs = 1;
279 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
280 new_inst.Instruction.NumSrcRegs = 2;
281 reg_src(&new_inst.Src[0], src1, SWIZ(Y,Z,X,_));
282 reg_src(&new_inst.Src[1], src0, SWIZ(Z,X,Y,_));
283 tctx->emit_instruction(tctx, &new_inst);
284
285 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
286 new_inst = tgsi_default_full_instruction();
287 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
288 new_inst.Instruction.NumDstRegs = 1;
289 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
290 new_inst.Instruction.NumSrcRegs = 2;
291 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,_));
292 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,_));
293 tctx->emit_instruction(tctx, &new_inst);
294 }
295
296 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
297 /* MOV dst.w, imm{1.0} */
298 new_inst = tgsi_default_full_instruction();
299 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
300 new_inst.Instruction.NumDstRegs = 1;
301 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
302 new_inst.Instruction.NumSrcRegs = 1;
303 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
304 tctx->emit_instruction(tctx, &new_inst);
305 }
306 }
307
308 /* SCS - Sine Cosine
309 * dst.x = \cos{src.x}
310 * dst.y = \sin{src.x}
311 * dst.z = 0.0
312 * dst.w = 1.0
313 *
314 * ; needs: 1 tmp, imm{0.0, 1.0}
315 * if (dst.x aliases src.x) {
316 * MOV tmpA.x, src.x
317 * src = tmpA
318 * }
319 * COS dst.x, src.x
320 * SIN dst.y, src.x
321 * MOV dst.zw, imm{0.0, 1.0}
322 */
323 #define SCS_GROW (12 - 3)
324 #define SCS_TMP 1
325 static void
326 transform_scs(struct tgsi_transform_context *tctx,
327 struct tgsi_full_instruction *inst)
328 {
329 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
330 struct tgsi_full_dst_register *dst = &inst->Dst[0];
331 struct tgsi_full_src_register *src = &inst->Src[0];
332 struct tgsi_full_instruction new_inst;
333
334 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
335 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X);
336 src = &ctx->tmp[A].src;
337 }
338
339 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
340 /* COS dst.x, src.x */
341 new_inst = tgsi_default_full_instruction();
342 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
343 new_inst.Instruction.NumDstRegs = 1;
344 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
345 new_inst.Instruction.NumSrcRegs = 1;
346 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
347 tctx->emit_instruction(tctx, &new_inst);
348 }
349
350 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
351 /* SIN dst.y, src.x */
352 new_inst = tgsi_default_full_instruction();
353 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
354 new_inst.Instruction.NumDstRegs = 1;
355 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
356 new_inst.Instruction.NumSrcRegs = 1;
357 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
358 tctx->emit_instruction(tctx, &new_inst);
359 }
360
361 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
362 /* MOV dst.zw, imm{0.0, 1.0} */
363 new_inst = tgsi_default_full_instruction();
364 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
365 new_inst.Instruction.NumDstRegs = 1;
366 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
367 new_inst.Instruction.NumSrcRegs = 1;
368 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,X,Y));
369 tctx->emit_instruction(tctx, &new_inst);
370 }
371 }
372
373 /* LRP - Linear Interpolate
374 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
375 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
376 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
377 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
378 *
379 * ; needs: 2 tmp, imm{1.0}
380 * MUL tmpA, src0, src1
381 * SUB tmpB, imm{1.0}, src0
382 * MUL tmpB, tmpB, src2
383 * ADD dst, tmpA, tmpB
384 */
385 #define LRP_GROW (16 - 4)
386 #define LRP_TMP 2
387 static void
388 transform_lrp(struct tgsi_transform_context *tctx,
389 struct tgsi_full_instruction *inst)
390 {
391 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
392 struct tgsi_full_dst_register *dst = &inst->Dst[0];
393 struct tgsi_full_src_register *src0 = &inst->Src[0];
394 struct tgsi_full_src_register *src1 = &inst->Src[1];
395 struct tgsi_full_src_register *src2 = &inst->Src[2];
396 struct tgsi_full_instruction new_inst;
397
398 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
399 /* MUL tmpA, src0, src1 */
400 new_inst = tgsi_default_full_instruction();
401 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
402 new_inst.Instruction.NumDstRegs = 1;
403 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
404 new_inst.Instruction.NumSrcRegs = 2;
405 reg_src(&new_inst.Src[0], src0, SWIZ(X,Y,Z,W));
406 reg_src(&new_inst.Src[1], src1, SWIZ(X,Y,Z,W));
407 tctx->emit_instruction(tctx, &new_inst);
408
409 /* SUB tmpB, imm{1.0}, src0 */
410 new_inst = tgsi_default_full_instruction();
411 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
412 new_inst.Instruction.NumDstRegs = 1;
413 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
414 new_inst.Instruction.NumSrcRegs = 2;
415 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,Y,Y,Y));
416 reg_src(&new_inst.Src[1], src0, SWIZ(X,Y,Z,W));
417 tctx->emit_instruction(tctx, &new_inst);
418
419 /* MUL tmpB, tmpB, src2 */
420 new_inst = tgsi_default_full_instruction();
421 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
422 new_inst.Instruction.NumDstRegs = 1;
423 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
424 new_inst.Instruction.NumSrcRegs = 2;
425 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
426 reg_src(&new_inst.Src[1], src2, SWIZ(X,Y,Z,W));
427 tctx->emit_instruction(tctx, &new_inst);
428
429 /* ADD dst, tmpA, tmpB */
430 new_inst = tgsi_default_full_instruction();
431 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
432 new_inst.Instruction.NumDstRegs = 1;
433 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
434 new_inst.Instruction.NumSrcRegs = 2;
435 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
436 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
437 tctx->emit_instruction(tctx, &new_inst);
438 }
439 }
440
441 /* FRC - Fraction
442 * dst.x = src.x - \lfloor src.x\rfloor
443 * dst.y = src.y - \lfloor src.y\rfloor
444 * dst.z = src.z - \lfloor src.z\rfloor
445 * dst.w = src.w - \lfloor src.w\rfloor
446 *
447 * ; needs: 1 tmp
448 * FLR tmpA, src
449 * SUB dst, src, tmpA
450 */
451 #define FRC_GROW (7 - 3)
452 #define FRC_TMP 1
453 static void
454 transform_frc(struct tgsi_transform_context *tctx,
455 struct tgsi_full_instruction *inst)
456 {
457 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
458 struct tgsi_full_dst_register *dst = &inst->Dst[0];
459 struct tgsi_full_src_register *src = &inst->Src[0];
460 struct tgsi_full_instruction new_inst;
461
462 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
463 /* FLR tmpA, src */
464 new_inst = tgsi_default_full_instruction();
465 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
466 new_inst.Instruction.NumDstRegs = 1;
467 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
468 new_inst.Instruction.NumSrcRegs = 1;
469 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
470 tctx->emit_instruction(tctx, &new_inst);
471
472 /* SUB dst, src, tmpA */
473 new_inst = tgsi_default_full_instruction();
474 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
475 new_inst.Instruction.NumDstRegs = 1;
476 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
477 new_inst.Instruction.NumSrcRegs = 2;
478 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
479 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
480 tctx->emit_instruction(tctx, &new_inst);
481 }
482 }
483
484 /* POW - Power
485 * dst.x = src0.x^{src1.x}
486 * dst.y = src0.x^{src1.x}
487 * dst.z = src0.x^{src1.x}
488 * dst.w = src0.x^{src1.x}
489 *
490 * ; needs: 1 tmp
491 * LG2 tmpA.x, src0.x
492 * MUL tmpA.x, src1.x, tmpA.x
493 * EX2 dst, tmpA.x
494 */
495 #define POW_GROW (10 - 4)
496 #define POW_TMP 1
497 static void
498 transform_pow(struct tgsi_transform_context *tctx,
499 struct tgsi_full_instruction *inst)
500 {
501 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
502 struct tgsi_full_dst_register *dst = &inst->Dst[0];
503 struct tgsi_full_src_register *src0 = &inst->Src[0];
504 struct tgsi_full_src_register *src1 = &inst->Src[1];
505 struct tgsi_full_instruction new_inst;
506
507 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
508 /* LG2 tmpA.x, src0.x */
509 new_inst = tgsi_default_full_instruction();
510 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
511 new_inst.Instruction.NumDstRegs = 1;
512 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
513 new_inst.Instruction.NumSrcRegs = 1;
514 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
515 tctx->emit_instruction(tctx, &new_inst);
516
517 /* MUL tmpA.x, src1.x, tmpA.x */
518 new_inst = tgsi_default_full_instruction();
519 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
520 new_inst.Instruction.NumDstRegs = 1;
521 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
522 new_inst.Instruction.NumSrcRegs = 2;
523 reg_src(&new_inst.Src[0], src1, SWIZ(X,_,_,_));
524 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,_,_,_));
525 tctx->emit_instruction(tctx, &new_inst);
526
527 /* EX2 dst, tmpA.x */
528 new_inst = tgsi_default_full_instruction();
529 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
530 new_inst.Instruction.NumDstRegs = 1;
531 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
532 new_inst.Instruction.NumSrcRegs = 1;
533 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
534 tctx->emit_instruction(tctx, &new_inst);
535 }
536 }
537
538 /* LIT - Light Coefficients
539 * dst.x = 1.0
540 * dst.y = max(src.x, 0.0)
541 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
542 * dst.w = 1.0
543 *
544 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
545 * MAX tmpA.xy, src.xy, imm{0.0}
546 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
547 * LG2 tmpA.y, tmpA.y
548 * MUL tmpA.y, tmpA.z, tmpA.y
549 * EX2 tmpA.y, tmpA.y
550 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
551 * MOV dst.yz, tmpA.xy
552 * MOV dst.xw, imm{1.0}
553 */
554 #define LIT_GROW (30 - 3)
555 #define LIT_TMP 1
556 static void
557 transform_lit(struct tgsi_transform_context *tctx,
558 struct tgsi_full_instruction *inst)
559 {
560 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
561 struct tgsi_full_dst_register *dst = &inst->Dst[0];
562 struct tgsi_full_src_register *src = &inst->Src[0];
563 struct tgsi_full_instruction new_inst;
564
565 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
566 /* MAX tmpA.xy, src.xy, imm{0.0} */
567 new_inst = tgsi_default_full_instruction();
568 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
569 new_inst.Instruction.NumDstRegs = 1;
570 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
571 new_inst.Instruction.NumSrcRegs = 2;
572 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,_,_));
573 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X,X,_,_));
574 tctx->emit_instruction(tctx, &new_inst);
575
576 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
577 new_inst = tgsi_default_full_instruction();
578 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
579 new_inst.Instruction.NumDstRegs = 1;
580 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
581 new_inst.Instruction.NumSrcRegs = 3;
582 reg_src(&new_inst.Src[0], src, SWIZ(_,_,W,_));
583 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_,_,Z,_));
584 new_inst.Src[1].Register.Negate = true;
585 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,_,Z,_));
586 tctx->emit_instruction(tctx, &new_inst);
587
588 /* LG2 tmpA.y, tmpA.y */
589 new_inst = tgsi_default_full_instruction();
590 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
591 new_inst.Instruction.NumDstRegs = 1;
592 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
593 new_inst.Instruction.NumSrcRegs = 1;
594 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
595 tctx->emit_instruction(tctx, &new_inst);
596
597 /* MUL tmpA.y, tmpA.z, tmpA.y */
598 new_inst = tgsi_default_full_instruction();
599 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
600 new_inst.Instruction.NumDstRegs = 1;
601 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
602 new_inst.Instruction.NumSrcRegs = 2;
603 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
604 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
605 tctx->emit_instruction(tctx, &new_inst);
606
607 /* EX2 tmpA.y, tmpA.y */
608 new_inst = tgsi_default_full_instruction();
609 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
610 new_inst.Instruction.NumDstRegs = 1;
611 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
612 new_inst.Instruction.NumSrcRegs = 1;
613 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
614 tctx->emit_instruction(tctx, &new_inst);
615
616 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
617 new_inst = tgsi_default_full_instruction();
618 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
619 new_inst.Instruction.NumDstRegs = 1;
620 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
621 new_inst.Instruction.NumSrcRegs = 3;
622 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
623 new_inst.Src[0].Register.Negate = true;
624 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
625 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,X,_,_));
626 tctx->emit_instruction(tctx, &new_inst);
627
628 /* MOV dst.yz, tmpA.xy */
629 new_inst = tgsi_default_full_instruction();
630 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
631 new_inst.Instruction.NumDstRegs = 1;
632 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
633 new_inst.Instruction.NumSrcRegs = 1;
634 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,Y,_));
635 tctx->emit_instruction(tctx, &new_inst);
636 }
637
638 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
639 /* MOV dst.xw, imm{1.0} */
640 new_inst = tgsi_default_full_instruction();
641 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
642 new_inst.Instruction.NumDstRegs = 1;
643 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
644 new_inst.Instruction.NumSrcRegs = 1;
645 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,Y));
646 tctx->emit_instruction(tctx, &new_inst);
647 }
648 }
649
650 /* EXP - Approximate Exponential Base 2
651 * dst.x = 2^{\lfloor src.x\rfloor}
652 * dst.y = src.x - \lfloor src.x\rfloor
653 * dst.z = 2^{src.x}
654 * dst.w = 1.0
655 *
656 * ; needs: 1 tmp, imm{1.0}
657 * FLR tmpA.x, src.x
658 * EX2 tmpA.y, src.x
659 * SUB dst.y, src.x, tmpA.x
660 * EX2 dst.x, tmpA.x
661 * MOV dst.z, tmpA.y
662 * MOV dst.w, imm{1.0}
663 */
664 #define EXP_GROW (19 - 3)
665 #define EXP_TMP 1
666 static void
667 transform_exp(struct tgsi_transform_context *tctx,
668 struct tgsi_full_instruction *inst)
669 {
670 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
671 struct tgsi_full_dst_register *dst = &inst->Dst[0];
672 struct tgsi_full_src_register *src = &inst->Src[0];
673 struct tgsi_full_instruction new_inst;
674
675 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
676 /* FLR tmpA.x, src.x */
677 new_inst = tgsi_default_full_instruction();
678 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
679 new_inst.Instruction.NumDstRegs = 1;
680 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
681 new_inst.Instruction.NumSrcRegs = 1;
682 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
683 tctx->emit_instruction(tctx, &new_inst);
684 }
685
686 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
687 /* EX2 tmpA.y, src.x */
688 new_inst = tgsi_default_full_instruction();
689 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
690 new_inst.Instruction.NumDstRegs = 1;
691 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
692 new_inst.Instruction.NumSrcRegs = 1;
693 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
694 tctx->emit_instruction(tctx, &new_inst);
695 }
696
697 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
698 /* SUB dst.y, src.x, tmpA.x */
699 new_inst = tgsi_default_full_instruction();
700 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
701 new_inst.Instruction.NumDstRegs = 1;
702 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
703 new_inst.Instruction.NumSrcRegs = 2;
704 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
705 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,X,_,_));
706 tctx->emit_instruction(tctx, &new_inst);
707 }
708
709 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
710 /* EX2 dst.x, tmpA.x */
711 new_inst = tgsi_default_full_instruction();
712 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
713 new_inst.Instruction.NumDstRegs = 1;
714 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
715 new_inst.Instruction.NumSrcRegs = 1;
716 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
717 tctx->emit_instruction(tctx, &new_inst);
718 }
719
720 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
721 /* MOV dst.z, tmpA.y */
722 new_inst = tgsi_default_full_instruction();
723 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
724 new_inst.Instruction.NumDstRegs = 1;
725 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
726 new_inst.Instruction.NumSrcRegs = 1;
727 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,_,Y,_));
728 tctx->emit_instruction(tctx, &new_inst);
729 }
730
731 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
732 /* MOV dst.w, imm{1.0} */
733 new_inst = tgsi_default_full_instruction();
734 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
735 new_inst.Instruction.NumDstRegs = 1;
736 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
737 new_inst.Instruction.NumSrcRegs = 1;
738 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
739 tctx->emit_instruction(tctx, &new_inst);
740 }
741 }
742
743 /* LOG - Approximate Logarithm Base 2
744 * dst.x = \lfloor\log_2{|src.x|}\rfloor
745 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
746 * dst.z = \log_2{|src.x|}
747 * dst.w = 1.0
748 *
749 * ; needs: 1 tmp, imm{1.0}
750 * LG2 tmpA.x, |src.x|
751 * FLR tmpA.y, tmpA.x
752 * EX2 tmpA.z, tmpA.y
753 * RCP tmpA.z, tmpA.z
754 * MUL dst.y, |src.x|, tmpA.z
755 * MOV dst.xz, tmpA.yx
756 * MOV dst.w, imm{1.0}
757 */
758 #define LOG_GROW (25 - 3)
759 #define LOG_TMP 1
760 static void
761 transform_log(struct tgsi_transform_context *tctx,
762 struct tgsi_full_instruction *inst)
763 {
764 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
765 struct tgsi_full_dst_register *dst = &inst->Dst[0];
766 struct tgsi_full_src_register *src = &inst->Src[0];
767 struct tgsi_full_instruction new_inst;
768
769 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
770 /* LG2 tmpA.x, |src.x| */
771 new_inst = tgsi_default_full_instruction();
772 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
773 new_inst.Instruction.NumDstRegs = 1;
774 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
775 new_inst.Instruction.NumSrcRegs = 1;
776 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
777 new_inst.Src[0].Register.Absolute = true;
778 tctx->emit_instruction(tctx, &new_inst);
779 }
780
781 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
782 /* FLR tmpA.y, tmpA.x */
783 new_inst = tgsi_default_full_instruction();
784 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
785 new_inst.Instruction.NumDstRegs = 1;
786 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
787 new_inst.Instruction.NumSrcRegs = 1;
788 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,_,_));
789 tctx->emit_instruction(tctx, &new_inst);
790 }
791
792 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
793 /* EX2 tmpA.z, tmpA.y */
794 new_inst = tgsi_default_full_instruction();
795 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
796 new_inst.Instruction.NumDstRegs = 1;
797 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
798 new_inst.Instruction.NumSrcRegs = 1;
799 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
800 tctx->emit_instruction(tctx, &new_inst);
801
802 /* RCP tmpA.z, tmpA.z */
803 new_inst = tgsi_default_full_instruction();
804 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
805 new_inst.Instruction.NumDstRegs = 1;
806 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
807 new_inst.Instruction.NumSrcRegs = 1;
808 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z,_,_,_));
809 tctx->emit_instruction(tctx, &new_inst);
810
811 /* MUL dst.y, |src.x|, tmpA.z */
812 new_inst = tgsi_default_full_instruction();
813 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
814 new_inst.Instruction.NumDstRegs = 1;
815 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
816 new_inst.Instruction.NumSrcRegs = 2;
817 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
818 new_inst.Src[0].Register.Absolute = true;
819 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
820 tctx->emit_instruction(tctx, &new_inst);
821 }
822
823 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
824 /* MOV dst.xz, tmpA.yx */
825 new_inst = tgsi_default_full_instruction();
826 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
827 new_inst.Instruction.NumDstRegs = 1;
828 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
829 new_inst.Instruction.NumSrcRegs = 1;
830 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,X,_));
831 tctx->emit_instruction(tctx, &new_inst);
832 }
833
834 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
835 /* MOV dst.w, imm{1.0} */
836 new_inst = tgsi_default_full_instruction();
837 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
838 new_inst.Instruction.NumDstRegs = 1;
839 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
840 new_inst.Instruction.NumSrcRegs = 1;
841 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
842 tctx->emit_instruction(tctx, &new_inst);
843 }
844 }
845
846 /* DP4 - 4-component Dot Product
847 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
848 *
849 * DP3 - 3-component Dot Product
850 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
851 *
852 * DPH - Homogeneous Dot Product
853 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
854 *
855 * DP2 - 2-component Dot Product
856 * dst = src0.x \times src1.x + src0.y \times src1.y
857 *
858 * DP2A - 2-component Dot Product And Add
859 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
860 *
861 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
862 * operations, which is what you'd prefer for a ISA that is natively
863 * scalar. Probably a native vector ISA would at least already have
864 * DP4/DP3 instructions, but perhaps there is room for an alternative
865 * translation for DPH/DP2/DP2A using vector instructions.
866 *
867 * ; needs: 1 tmp
868 * MUL tmpA.x, src0.x, src1.x
869 * MAD tmpA.x, src0.y, src1.y, tmpA.x
870 * if (DPH || DP3 || DP4) {
871 * MAD tmpA.x, src0.z, src1.z, tmpA.x
872 * if (DPH) {
873 * ADD tmpA.x, src1.w, tmpA.x
874 * } else if (DP4) {
875 * MAD tmpA.x, src0.w, src1.w, tmpA.x
876 * }
877 * } else if (DP2A) {
878 * ADD tmpA.x, src2.x, tmpA.x
879 * }
880 * ; fixup last instruction to replicate into dst
881 */
882 #define DP4_GROW (19 - 4)
883 #define DP3_GROW (14 - 4)
884 #define DPH_GROW (18 - 4)
885 #define DP2_GROW ( 9 - 4)
886 #define DP2A_GROW (13 - 4)
887 #define DOTP_TMP 1
888 static void
889 transform_dotp(struct tgsi_transform_context *tctx,
890 struct tgsi_full_instruction *inst)
891 {
892 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
893 struct tgsi_full_dst_register *dst = &inst->Dst[0];
894 struct tgsi_full_src_register *src0 = &inst->Src[0];
895 struct tgsi_full_src_register *src1 = &inst->Src[1];
896 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
897 struct tgsi_full_instruction new_inst;
898 unsigned opcode = inst->Instruction.Opcode;
899
900 /* NOTE: any potential last instruction must replicate src on all
901 * components (since it could be re-written to write to final dst)
902 */
903
904 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
905 /* MUL tmpA.x, src0.x, src1.x */
906 new_inst = tgsi_default_full_instruction();
907 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
908 new_inst.Instruction.NumDstRegs = 1;
909 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
910 new_inst.Instruction.NumSrcRegs = 2;
911 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
912 reg_src(&new_inst.Src[1], src1, SWIZ(X,_,_,_));
913 tctx->emit_instruction(tctx, &new_inst);
914
915 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
916 new_inst = tgsi_default_full_instruction();
917 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
918 new_inst.Instruction.NumDstRegs = 1;
919 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
920 new_inst.Instruction.NumSrcRegs = 3;
921 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Y,Y,Y));
922 reg_src(&new_inst.Src[1], src1, SWIZ(Y,Y,Y,Y));
923 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
924
925 if ((opcode == TGSI_OPCODE_DPH) ||
926 (opcode == TGSI_OPCODE_DP3) ||
927 (opcode == TGSI_OPCODE_DP4)) {
928 tctx->emit_instruction(tctx, &new_inst);
929
930 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
931 new_inst = tgsi_default_full_instruction();
932 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
933 new_inst.Instruction.NumDstRegs = 1;
934 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
935 new_inst.Instruction.NumSrcRegs = 3;
936 reg_src(&new_inst.Src[0], src0, SWIZ(Z,Z,Z,Z));
937 reg_src(&new_inst.Src[1], src1, SWIZ(Z,Z,Z,Z));
938 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
939
940 if (opcode == TGSI_OPCODE_DPH) {
941 tctx->emit_instruction(tctx, &new_inst);
942
943 /* ADD tmpA.x, src1.w, tmpA.x */
944 new_inst = tgsi_default_full_instruction();
945 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
946 new_inst.Instruction.NumDstRegs = 1;
947 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
948 new_inst.Instruction.NumSrcRegs = 2;
949 reg_src(&new_inst.Src[0], src1, SWIZ(W,W,W,W));
950 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
951 } else if (opcode == TGSI_OPCODE_DP4) {
952 tctx->emit_instruction(tctx, &new_inst);
953
954 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
955 new_inst = tgsi_default_full_instruction();
956 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
957 new_inst.Instruction.NumDstRegs = 1;
958 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
959 new_inst.Instruction.NumSrcRegs = 3;
960 reg_src(&new_inst.Src[0], src0, SWIZ(W,W,W,W));
961 reg_src(&new_inst.Src[1], src1, SWIZ(W,W,W,W));
962 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
963 }
964 } else if (opcode == TGSI_OPCODE_DP2A) {
965 tctx->emit_instruction(tctx, &new_inst);
966
967 /* ADD tmpA.x, src2.x, tmpA.x */
968 new_inst = tgsi_default_full_instruction();
969 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
970 new_inst.Instruction.NumDstRegs = 1;
971 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
972 new_inst.Instruction.NumSrcRegs = 2;
973 reg_src(&new_inst.Src[0], src2, SWIZ(X,X,X,X));
974 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
975 }
976
977 /* fixup last instruction to write to dst: */
978 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
979
980 tctx->emit_instruction(tctx, &new_inst);
981 }
982 }
983
984
985 /* Two-sided color emulation:
986 * For each COLOR input, create a corresponding BCOLOR input, plus
987 * CMP instruction to select front or back color based on FACE
988 */
989 #define TWOSIDE_GROW(n) ( \
990 2 + /* FACE */ \
991 ((n) * 2) + /* IN[] BCOLOR[n] */ \
992 ((n) * 1) + /* TEMP[] */ \
993 ((n) * 5) /* CMP instr */ \
994 )
995
996 static void
997 emit_twoside(struct tgsi_transform_context *tctx)
998 {
999 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1000 struct tgsi_shader_info *info = ctx->info;
1001 struct tgsi_full_declaration decl;
1002 struct tgsi_full_instruction new_inst;
1003 unsigned inbase, tmpbase;
1004 int i;
1005
1006 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1007 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1008
1009 /* additional inputs for BCOLOR's */
1010 for (i = 0; i < ctx->two_side_colors; i++) {
1011 decl = tgsi_default_full_declaration();
1012 decl.Declaration.File = TGSI_FILE_INPUT;
1013 decl.Declaration.Semantic = true;
1014 decl.Range.First = decl.Range.Last = inbase + i;
1015 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1016 decl.Semantic.Index =
1017 info->input_semantic_index[ctx->two_side_idx[i]];
1018 tctx->emit_declaration(tctx, &decl);
1019 }
1020
1021 /* additional input for FACE */
1022 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1023 decl = tgsi_default_full_declaration();
1024 decl.Declaration.File = TGSI_FILE_INPUT;
1025 decl.Declaration.Semantic = true;
1026 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1027 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1028 decl.Semantic.Index = 0;
1029 tctx->emit_declaration(tctx, &decl);
1030
1031 ctx->face_idx = decl.Range.First;
1032 }
1033
1034 /* additional temps for COLOR/BCOLOR selection: */
1035 for (i = 0; i < ctx->two_side_colors; i++) {
1036 decl = tgsi_default_full_declaration();
1037 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1038 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1039 tctx->emit_declaration(tctx, &decl);
1040 }
1041
1042 /* and finally additional instructions to select COLOR/BCOLOR: */
1043 for (i = 0; i < ctx->two_side_colors; i++) {
1044 new_inst = tgsi_default_full_instruction();
1045 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1046
1047 new_inst.Instruction.NumDstRegs = 1;
1048 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1049 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1050 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1051
1052 new_inst.Instruction.NumSrcRegs = 3;
1053 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1054 new_inst.Src[0].Register.Index = ctx->face_idx;
1055 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1056 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1057 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1058 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1059 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1060 new_inst.Src[1].Register.Index = inbase + i;
1061 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1062 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1063 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1064 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1065 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1066 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1067 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1068 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1069 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1070 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1071
1072 tctx->emit_instruction(tctx, &new_inst);
1073 }
1074 }
1075
1076 static void
1077 emit_decls(struct tgsi_transform_context *tctx)
1078 {
1079 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1080 struct tgsi_shader_info *info = ctx->info;
1081 struct tgsi_full_declaration decl;
1082 struct tgsi_full_immediate immed;
1083 unsigned tmpbase;
1084 int i;
1085
1086 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1087
1088 ctx->color_base = tmpbase + ctx->numtmp;
1089
1090 /* declare immediate: */
1091 immed = tgsi_default_full_immediate();
1092 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1093 immed.u[0].Float = 0.0;
1094 immed.u[1].Float = 1.0;
1095 immed.u[2].Float = 128.0;
1096 immed.u[3].Float = 0.0;
1097 tctx->emit_immediate(tctx, &immed);
1098
1099 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1100 ctx->imm.Register.Index = info->immediate_count;
1101 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1102 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1103 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1104 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1105
1106 /* declare temp regs: */
1107 for (i = 0; i < ctx->numtmp; i++) {
1108 decl = tgsi_default_full_declaration();
1109 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1110 decl.Range.First = decl.Range.Last = tmpbase + i;
1111 tctx->emit_declaration(tctx, &decl);
1112
1113 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1114 ctx->tmp[i].src.Register.Index = tmpbase + i;
1115 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1116 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1117 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1118 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1119
1120 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1121 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1122 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1123 }
1124
1125 if (ctx->two_side_colors)
1126 emit_twoside(tctx);
1127 }
1128
1129 static void
1130 rename_color_inputs(struct fd_lowering_context *ctx,
1131 struct tgsi_full_instruction *inst)
1132 {
1133 unsigned i, j;
1134 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1135 struct tgsi_src_register *src = &inst->Src[i].Register;
1136 if (src->File == TGSI_FILE_INPUT) {
1137 for (j = 0; j < ctx->two_side_colors; j++) {
1138 if (src->Index == ctx->two_side_idx[j]) {
1139 src->File = TGSI_FILE_TEMPORARY;
1140 src->Index = ctx->color_base + j;
1141 break;
1142 }
1143 }
1144 }
1145 }
1146
1147 }
1148
1149 static void
1150 transform_instr(struct tgsi_transform_context *tctx,
1151 struct tgsi_full_instruction *inst)
1152 {
1153 struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1154
1155 if (!ctx->emitted_decls) {
1156 emit_decls(tctx);
1157 ctx->emitted_decls = 1;
1158 }
1159
1160 /* if emulating two-sided-color, we need to re-write some
1161 * src registers:
1162 */
1163 if (ctx->two_side_colors)
1164 rename_color_inputs(ctx, inst);
1165
1166 switch (inst->Instruction.Opcode) {
1167 case TGSI_OPCODE_DST:
1168 if (!ctx->config->lower_DST)
1169 goto skip;
1170 transform_dst(tctx, inst);
1171 break;
1172 case TGSI_OPCODE_XPD:
1173 if (!ctx->config->lower_XPD)
1174 goto skip;
1175 transform_xpd(tctx, inst);
1176 break;
1177 case TGSI_OPCODE_SCS:
1178 if (!ctx->config->lower_SCS)
1179 goto skip;
1180 transform_scs(tctx, inst);
1181 break;
1182 case TGSI_OPCODE_LRP:
1183 if (!ctx->config->lower_LRP)
1184 goto skip;
1185 transform_lrp(tctx, inst);
1186 break;
1187 case TGSI_OPCODE_FRC:
1188 if (!ctx->config->lower_FRC)
1189 goto skip;
1190 transform_frc(tctx, inst);
1191 break;
1192 case TGSI_OPCODE_POW:
1193 if (!ctx->config->lower_POW)
1194 goto skip;
1195 transform_pow(tctx, inst);
1196 break;
1197 case TGSI_OPCODE_LIT:
1198 if (!ctx->config->lower_LIT)
1199 goto skip;
1200 transform_lit(tctx, inst);
1201 break;
1202 case TGSI_OPCODE_EXP:
1203 if (!ctx->config->lower_EXP)
1204 goto skip;
1205 transform_exp(tctx, inst);
1206 break;
1207 case TGSI_OPCODE_LOG:
1208 if (!ctx->config->lower_LOG)
1209 goto skip;
1210 transform_log(tctx, inst);
1211 break;
1212 case TGSI_OPCODE_DP4:
1213 if (!ctx->config->lower_DP4)
1214 goto skip;
1215 transform_dotp(tctx, inst);
1216 break;
1217 case TGSI_OPCODE_DP3:
1218 if (!ctx->config->lower_DP3)
1219 goto skip;
1220 transform_dotp(tctx, inst);
1221 break;
1222 case TGSI_OPCODE_DPH:
1223 if (!ctx->config->lower_DPH)
1224 goto skip;
1225 transform_dotp(tctx, inst);
1226 break;
1227 case TGSI_OPCODE_DP2:
1228 if (!ctx->config->lower_DP2)
1229 goto skip;
1230 transform_dotp(tctx, inst);
1231 break;
1232 case TGSI_OPCODE_DP2A:
1233 if (!ctx->config->lower_DP2A)
1234 goto skip;
1235 transform_dotp(tctx, inst);
1236 break;
1237 default:
1238 skip:
1239 tctx->emit_instruction(tctx, inst);
1240 break;
1241 }
1242 }
1243
1244 /* returns NULL if no lowering required, else returns the new
1245 * tokens (which caller is required to free()). In either case
1246 * returns the current info.
1247 */
1248 const struct tgsi_token *
1249 fd_transform_lowering(const struct fd_lowering_config *config,
1250 const struct tgsi_token *tokens,
1251 struct tgsi_shader_info *info)
1252 {
1253 struct fd_lowering_context ctx;
1254 struct tgsi_token *newtoks;
1255 int newlen, numtmp;
1256
1257 memset(&ctx, 0, sizeof(ctx));
1258 ctx.base.transform_instruction = transform_instr;
1259 ctx.info = info;
1260 ctx.config = config;
1261
1262 tgsi_scan_shader(tokens, info);
1263
1264 /* if we are adding fragment shader support to emulate two-sided
1265 * color, then figure out the number of additional inputs we need
1266 * to create for BCOLOR's..
1267 */
1268 if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1269 config->color_two_side) {
1270 int i;
1271 ctx.face_idx = -1;
1272 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1273 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1274 ctx.two_side_idx[ctx.two_side_colors++] = i;
1275 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1276 ctx.face_idx = i;
1277 }
1278 }
1279
1280 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1281 /* if there are no instructions to lower, then we are done: */
1282 if (!(OPCS(DST) ||
1283 OPCS(XPD) ||
1284 OPCS(SCS) ||
1285 OPCS(LRP) ||
1286 OPCS(FRC) ||
1287 OPCS(POW) ||
1288 OPCS(LIT) ||
1289 OPCS(EXP) ||
1290 OPCS(LOG) ||
1291 OPCS(DP4) ||
1292 OPCS(DP3) ||
1293 OPCS(DPH) ||
1294 OPCS(DP2) ||
1295 OPCS(DP2A) ||
1296 ctx.two_side_colors))
1297 return NULL;
1298
1299 #if 0 /* debug */
1300 _debug_printf("BEFORE:");
1301 tgsi_dump(tokens, 0);
1302 #endif
1303
1304 numtmp = 0;
1305 newlen = tgsi_num_tokens(tokens);
1306 if (OPCS(DST)) {
1307 newlen += DST_GROW * OPCS(DST);
1308 numtmp = MAX2(numtmp, DST_TMP);
1309 }
1310 if (OPCS(XPD)) {
1311 newlen += XPD_GROW * OPCS(XPD);
1312 numtmp = MAX2(numtmp, XPD_TMP);
1313 }
1314 if (OPCS(SCS)) {
1315 newlen += SCS_GROW * OPCS(SCS);
1316 numtmp = MAX2(numtmp, SCS_TMP);
1317 }
1318 if (OPCS(LRP)) {
1319 newlen += LRP_GROW * OPCS(LRP);
1320 numtmp = MAX2(numtmp, LRP_TMP);
1321 }
1322 if (OPCS(FRC)) {
1323 newlen += FRC_GROW * OPCS(FRC);
1324 numtmp = MAX2(numtmp, FRC_TMP);
1325 }
1326 if (OPCS(POW)) {
1327 newlen += POW_GROW * OPCS(POW);
1328 numtmp = MAX2(numtmp, POW_TMP);
1329 }
1330 if (OPCS(LIT)) {
1331 newlen += LIT_GROW * OPCS(LIT);
1332 numtmp = MAX2(numtmp, LIT_TMP);
1333 }
1334 if (OPCS(EXP)) {
1335 newlen += EXP_GROW * OPCS(EXP);
1336 numtmp = MAX2(numtmp, EXP_TMP);
1337 }
1338 if (OPCS(LOG)) {
1339 newlen += LOG_GROW * OPCS(LOG);
1340 numtmp = MAX2(numtmp, LOG_TMP);
1341 }
1342 if (OPCS(DP4)) {
1343 newlen += DP4_GROW * OPCS(DP4);
1344 numtmp = MAX2(numtmp, DOTP_TMP);
1345 }
1346 if (OPCS(DP3)) {
1347 newlen += DP3_GROW * OPCS(DP3);
1348 numtmp = MAX2(numtmp, DOTP_TMP);
1349 }
1350 if (OPCS(DPH)) {
1351 newlen += DPH_GROW * OPCS(DPH);
1352 numtmp = MAX2(numtmp, DOTP_TMP);
1353 }
1354 if (OPCS(DP2)) {
1355 newlen += DP2_GROW * OPCS(DP2);
1356 numtmp = MAX2(numtmp, DOTP_TMP);
1357 }
1358 if (OPCS(DP2A)) {
1359 newlen += DP2A_GROW * OPCS(DP2A);
1360 numtmp = MAX2(numtmp, DOTP_TMP);
1361 }
1362
1363 /* specifically don't include two_side_colors temps in the count: */
1364 ctx.numtmp = numtmp;
1365
1366 if (ctx.two_side_colors) {
1367 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1368 /* note: we permanently consume temp regs, re-writing references
1369 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1370 * instruction that selects which varying to use):
1371 */
1372 numtmp += ctx.two_side_colors;
1373 }
1374
1375 newlen += 2 * numtmp;
1376 newlen += 5; /* immediate */
1377
1378 newtoks = tgsi_alloc_tokens(newlen);
1379 if (!newtoks)
1380 return NULL;
1381
1382 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1383
1384 tgsi_scan_shader(newtoks, info);
1385
1386 #if 0 /* debug */
1387 _debug_printf("AFTER:");
1388 tgsi_dump(newtoks, 0);
1389 #endif
1390
1391 return newtoks;
1392 }