src/gallium/drivers/freedreno/freedreno_lowering.c

   1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
   2
   3 /*
   4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the next
  14  * paragraph) shall be included in all copies or substantial portions of the
  15  * Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23  * SOFTWARE.
  24  *
  25  * Authors:
  26  *    Rob Clark <robclark@freedesktop.org>
  27  */
  28
  29 #include "tgsi/tgsi_transform.h"
  30 #include "tgsi/tgsi_scan.h"
  31 #include "tgsi/tgsi_dump.h"
  32
  33 #include "util/u_debug.h"
  34 #include "util/u_math.h"
  35
  36 #include "freedreno_lowering.h"
  37
  38 struct fd_lowering_context {
  39         struct tgsi_transform_context base;
  40         const struct fd_lowering_config *config;
  41         struct tgsi_shader_info *info;
  42         unsigned two_side_colors;
  43         unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
  44         unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
  45         int face_idx;
  46         unsigned numtmp;
  47         struct {
  48                 struct tgsi_full_src_register src;
  49                 struct tgsi_full_dst_register dst;
  50         } tmp[2];
  51 #define A 0
  52 #define B 1
  53         struct tgsi_full_src_register imm;
  54         int emitted_decls;
  55         unsigned saturate;
  56 };
  57
  58 static inline struct fd_lowering_context *
  59 fd_lowering_context(struct tgsi_transform_context *tctx)
  60 {
  61         return (struct fd_lowering_context *)tctx;
  62 }
  63
  64 /*
  65  * Utility helpers:
  66  */
  67
  68 static void
  69 reg_dst(struct tgsi_full_dst_register *dst,
  70         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
  71 {
  72         *dst = *orig_dst;
  73         dst->Register.WriteMask &= wrmask;
  74         assert(dst->Register.WriteMask);
  75 }
  76
  77 static inline void
  78 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
  79 {
  80         swiz[0] = src->SwizzleX;
  81         swiz[1] = src->SwizzleY;
  82         swiz[2] = src->SwizzleZ;
  83         swiz[3] = src->SwizzleW;
  84 }
  85
  86 static void
  87 reg_src(struct tgsi_full_src_register *src,
  88         const struct tgsi_full_src_register *orig_src,
  89         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
  90 {
  91         unsigned swiz[4];
  92         get_swiz(swiz, &orig_src->Register);
  93         *src = *orig_src;
  94         src->Register.SwizzleX = swiz[sx];
  95         src->Register.SwizzleY = swiz[sy];
  96         src->Register.SwizzleZ = swiz[sz];
  97         src->Register.SwizzleW = swiz[sw];
  98 }
  99
 100 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
 101 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
 102                 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
 103
 104 /*
 105  * if (dst.x aliases src.x) {
 106  *   MOV tmpA.x, src.x
 107  *   src = tmpA
 108  * }
 109  * COS dst.x, src.x
 110  * SIN dst.y, src.x
 111  * MOV dst.zw, imm{0.0, 1.0}
 112  */
 113 static bool
 114 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
 115         const struct tgsi_full_src_register *src, unsigned src_mask)
 116 {
 117         if ((dst->Register.File == src->Register.File) &&
 118                         (dst->Register.Index == src->Register.Index)) {
 119                 unsigned i, actual_mask = 0;
 120                 unsigned swiz[4];
 121                 get_swiz(swiz, &src->Register);
 122                 for (i = 0; i < 4; i++)
 123                         if (src_mask & (1 << i))
 124                                 actual_mask |= (1 << swiz[i]);
 125                 if (actual_mask & dst_mask)
 126                         return true;
 127         }
 128         return false;
 129 }
 130
 131 static void
 132 create_mov(struct tgsi_transform_context *tctx,
 133         const struct tgsi_full_dst_register *dst,
 134         const struct tgsi_full_src_register *src,
 135         unsigned mask, unsigned saturate)
 136 {
 137         struct tgsi_full_instruction new_inst;
 138
 139         new_inst = tgsi_default_full_instruction();
 140         new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 141         new_inst.Instruction.Saturate = saturate;
 142         new_inst.Instruction.NumDstRegs = 1;
 143         reg_dst(&new_inst.Dst[0], dst, mask);
 144         new_inst.Instruction.NumSrcRegs = 1;
 145         reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
 146         tctx->emit_instruction(tctx, &new_inst);
 147 }
 148
 149 /* to help calculate # of tgsi tokens for a lowering.. we assume
 150  * the worst case, ie. removed instructions don't have ADDR[] or
 151  * anything which increases the # of tokens per src/dst and the
 152  * inserted instructions do.
 153  *
 154  * OINST() - old instruction
 155  *    1         : instruction itself
 156  *    1         : dst
 157  *    1 * nargs : srcN
 158  *
 159  * NINST() - new instruction
 160  *    1         : instruction itself
 161  *    2         : dst
 162  *    2 * nargs : srcN
 163  */
 164
 165 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
 166 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
 167
 168 /*
 169  * Lowering Translators:
 170  */
 171
 172 /* DST - Distance Vector
 173  *   dst.x = 1.0
 174  *   dst.y = src0.y \times src1.y
 175  *   dst.z = src0.z
 176  *   dst.w = src1.w
 177  *
 178  * ; note: could be more clever and use just a single temp
 179  * ;       if I was clever enough to re-write the swizzles.
 180  * ; needs: 2 tmp, imm{1.0}
 181  * if (dst.y aliases src0.z) {
 182  *   MOV tmpA.yz, src0.yz
 183  *   src0 = tmpA
 184  * }
 185  * if (dst.yz aliases src1.w) {
 186  *   MOV tmpB.yw, src1.yw
 187  *   src1 = tmpB
 188  * }
 189  * MUL dst.y, src0.y, src1.y
 190  * MOV dst.z, src0.z
 191  * MOV dst.w, src1.w
 192  * MOV dst.x, imm{1.0}
 193  */
 194 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
 195                 NINST(1) + NINST(1) - OINST(2))
 196 #define DST_TMP  2
 197 static void
 198 transform_dst(struct tgsi_transform_context *tctx,
 199                 struct tgsi_full_instruction *inst)
 200 {
 201         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 202         struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 203         struct tgsi_full_src_register *src0 = &inst->Src[0];
 204         struct tgsi_full_src_register *src1 = &inst->Src[1];
 205         struct tgsi_full_instruction new_inst;
 206
 207         if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
 208                 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
 209                 src0 = &ctx->tmp[A].src;
 210         }
 211
 212         if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
 213                 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
 214                 src1 = &ctx->tmp[B].src;
 215         }
 216
 217         if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 218                 /* MUL dst.y, src0.y, src1.y */
 219                 new_inst = tgsi_default_full_instruction();
 220                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 221                 new_inst.Instruction.NumDstRegs = 1;
 222                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 223                 new_inst.Instruction.NumSrcRegs = 2;
 224                 reg_src(&new_inst.Src[0], src0, SWIZ(_,Y,_,_));
 225                 reg_src(&new_inst.Src[1], src1, SWIZ(_,Y,_,_));
 226                 tctx->emit_instruction(tctx, &new_inst);
 227         }
 228
 229         if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 230                 /* MOV dst.z, src0.z */
 231                 new_inst = tgsi_default_full_instruction();
 232                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 233                 new_inst.Instruction.NumDstRegs = 1;
 234                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 235                 new_inst.Instruction.NumSrcRegs = 1;
 236                 reg_src(&new_inst.Src[0], src0, SWIZ(_,_,Z,_));
 237                 tctx->emit_instruction(tctx, &new_inst);
 238         }
 239
 240         if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 241                 /* MOV dst.w, src1.w */
 242                 new_inst = tgsi_default_full_instruction();
 243                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 244                 new_inst.Instruction.NumDstRegs = 1;
 245                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 246                 new_inst.Instruction.NumSrcRegs = 1;
 247                 reg_src(&new_inst.Src[0], src1, SWIZ(_,_,_,W));
 248                 tctx->emit_instruction(tctx, &new_inst);
 249         }
 250
 251         if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 252                 /* MOV dst.x, imm{1.0} */
 253                 new_inst = tgsi_default_full_instruction();
 254                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 255                 new_inst.Instruction.NumDstRegs = 1;
 256                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 257                 new_inst.Instruction.NumSrcRegs = 1;
 258                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,_));
 259                 tctx->emit_instruction(tctx, &new_inst);
 260         }
 261 }
 262
 263 /* XPD - Cross Product
 264  *   dst.x = src0.y \times src1.z - src1.y \times src0.z
 265  *   dst.y = src0.z \times src1.x - src1.z \times src0.x
 266  *   dst.z = src0.x \times src1.y - src1.x \times src0.y
 267  *   dst.w = 1.0
 268  *
 269  * ; needs: 2 tmp, imm{1.0}
 270  * MUL tmpA.xyz, src0.yzx, src1.zxy
 271  * MUL tmpB.xyz, src1.yzx, src0.zxy
 272  * SUB dst.xyz, tmpA.xyz, tmpB.xyz
 273  * MOV dst.w, imm{1.0}
 274  */
 275 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
 276 #define XPD_TMP  2
 277 static void
 278 transform_xpd(struct tgsi_transform_context *tctx,
 279                 struct tgsi_full_instruction *inst)
 280 {
 281         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 282         struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 283         struct tgsi_full_src_register *src0 = &inst->Src[0];
 284         struct tgsi_full_src_register *src1 = &inst->Src[1];
 285         struct tgsi_full_instruction new_inst;
 286
 287         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
 288                 /* MUL tmpA.xyz, src0.yzx, src1.zxy */
 289                 new_inst = tgsi_default_full_instruction();
 290                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 291                 new_inst.Instruction.NumDstRegs = 1;
 292                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
 293                 new_inst.Instruction.NumSrcRegs = 2;
 294                 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Z,X,_));
 295                 reg_src(&new_inst.Src[1], src1, SWIZ(Z,X,Y,_));
 296                 tctx->emit_instruction(tctx, &new_inst);
 297
 298                 /* MUL tmpB.xyz, src1.yzx, src0.zxy */
 299                 new_inst = tgsi_default_full_instruction();
 300                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 301                 new_inst.Instruction.NumDstRegs = 1;
 302                 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
 303                 new_inst.Instruction.NumSrcRegs = 2;
 304                 reg_src(&new_inst.Src[0], src1, SWIZ(Y,Z,X,_));
 305                 reg_src(&new_inst.Src[1], src0, SWIZ(Z,X,Y,_));
 306                 tctx->emit_instruction(tctx, &new_inst);
 307
 308                 /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
 309                 new_inst = tgsi_default_full_instruction();
 310                 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 311                 new_inst.Instruction.NumDstRegs = 1;
 312                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
 313                 new_inst.Instruction.NumSrcRegs = 2;
 314                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,_));
 315                 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,_));
 316                 tctx->emit_instruction(tctx, &new_inst);
 317         }
 318
 319         if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 320                 /* MOV dst.w, imm{1.0} */
 321                 new_inst = tgsi_default_full_instruction();
 322                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 323                 new_inst.Instruction.NumDstRegs = 1;
 324                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 325                 new_inst.Instruction.NumSrcRegs = 1;
 326                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
 327                 tctx->emit_instruction(tctx, &new_inst);
 328         }
 329 }
 330
 331 /* SCS - Sine Cosine
 332  *   dst.x = \cos{src.x}
 333  *   dst.y = \sin{src.x}
 334  *   dst.z = 0.0
 335  *   dst.w = 1.0
 336  *
 337  * ; needs: 1 tmp, imm{0.0, 1.0}
 338  * if (dst.x aliases src.x) {
 339  *   MOV tmpA.x, src.x
 340  *   src = tmpA
 341  * }
 342  * COS dst.x, src.x
 343  * SIN dst.y, src.x
 344  * MOV dst.zw, imm{0.0, 1.0}
 345  */
 346 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
 347 #define SCS_TMP  1
 348 static void
 349 transform_scs(struct tgsi_transform_context *tctx,
 350                 struct tgsi_full_instruction *inst)
 351 {
 352         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 353         struct tgsi_full_dst_register *dst = &inst->Dst[0];
 354         struct tgsi_full_src_register *src = &inst->Src[0];
 355         struct tgsi_full_instruction new_inst;
 356
 357         if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
 358                 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
 359                 src = &ctx->tmp[A].src;
 360         }
 361
 362         if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 363                 /* COS dst.x, src.x */
 364                 new_inst = tgsi_default_full_instruction();
 365                 new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
 366                 new_inst.Instruction.NumDstRegs = 1;
 367                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 368                 new_inst.Instruction.NumSrcRegs = 1;
 369                 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
 370                 tctx->emit_instruction(tctx, &new_inst);
 371         }
 372
 373         if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 374                 /* SIN dst.y, src.x */
 375                 new_inst = tgsi_default_full_instruction();
 376                 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
 377                 new_inst.Instruction.NumDstRegs = 1;
 378                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 379                 new_inst.Instruction.NumSrcRegs = 1;
 380                 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
 381                 tctx->emit_instruction(tctx, &new_inst);
 382         }
 383
 384         if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
 385                 /* MOV dst.zw, imm{0.0, 1.0} */
 386                 new_inst = tgsi_default_full_instruction();
 387                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 388                 new_inst.Instruction.NumDstRegs = 1;
 389                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
 390                 new_inst.Instruction.NumSrcRegs = 1;
 391                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,X,Y));
 392                 tctx->emit_instruction(tctx, &new_inst);
 393         }
 394 }
 395
 396 /* LRP - Linear Interpolate
 397  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
 398  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
 399  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
 400  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
 401  *
 402  * ; needs: 2 tmp, imm{1.0}
 403  * MUL tmpA, src0, src1
 404  * SUB tmpB, imm{1.0}, src0
 405  * MUL tmpB, tmpB, src2
 406  * ADD dst, tmpA, tmpB
 407  */
 408 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
 409 #define LRP_TMP  2
 410 static void
 411 transform_lrp(struct tgsi_transform_context *tctx,
 412                 struct tgsi_full_instruction *inst)
 413 {
 414         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 415         struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 416         struct tgsi_full_src_register *src0 = &inst->Src[0];
 417         struct tgsi_full_src_register *src1 = &inst->Src[1];
 418         struct tgsi_full_src_register *src2 = &inst->Src[2];
 419         struct tgsi_full_instruction new_inst;
 420
 421         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 422                 /* MUL tmpA, src0, src1 */
 423                 new_inst = tgsi_default_full_instruction();
 424                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 425                 new_inst.Instruction.NumDstRegs = 1;
 426                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 427                 new_inst.Instruction.NumSrcRegs = 2;
 428                 reg_src(&new_inst.Src[0], src0, SWIZ(X,Y,Z,W));
 429                 reg_src(&new_inst.Src[1], src1, SWIZ(X,Y,Z,W));
 430                 tctx->emit_instruction(tctx, &new_inst);
 431
 432                 /* SUB tmpB, imm{1.0}, src0 */
 433                 new_inst = tgsi_default_full_instruction();
 434                 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 435                 new_inst.Instruction.NumDstRegs = 1;
 436                 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
 437                 new_inst.Instruction.NumSrcRegs = 2;
 438                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,Y,Y,Y));
 439                 reg_src(&new_inst.Src[1], src0, SWIZ(X,Y,Z,W));
 440                 tctx->emit_instruction(tctx, &new_inst);
 441
 442                 /* MUL tmpB, tmpB, src2 */
 443                 new_inst = tgsi_default_full_instruction();
 444                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 445                 new_inst.Instruction.NumDstRegs = 1;
 446                 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
 447                 new_inst.Instruction.NumSrcRegs = 2;
 448                 reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
 449                 reg_src(&new_inst.Src[1], src2, SWIZ(X,Y,Z,W));
 450                 tctx->emit_instruction(tctx, &new_inst);
 451
 452                 /* ADD dst, tmpA, tmpB */
 453                 new_inst = tgsi_default_full_instruction();
 454                 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 455                 new_inst.Instruction.NumDstRegs = 1;
 456                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 457                 new_inst.Instruction.NumSrcRegs = 2;
 458                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
 459                 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,Y,Z,W));
 460                 tctx->emit_instruction(tctx, &new_inst);
 461         }
 462 }
 463
 464 /* FRC - Fraction
 465  *  dst.x = src.x - \lfloor src.x\rfloor
 466  *  dst.y = src.y - \lfloor src.y\rfloor
 467  *  dst.z = src.z - \lfloor src.z\rfloor
 468  *  dst.w = src.w - \lfloor src.w\rfloor
 469  *
 470  * ; needs: 1 tmp
 471  * FLR tmpA, src
 472  * SUB dst, src, tmpA
 473  */
 474 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
 475 #define FRC_TMP  1
 476 static void
 477 transform_frc(struct tgsi_transform_context *tctx,
 478                 struct tgsi_full_instruction *inst)
 479 {
 480         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 481         struct tgsi_full_dst_register *dst = &inst->Dst[0];
 482         struct tgsi_full_src_register *src = &inst->Src[0];
 483         struct tgsi_full_instruction new_inst;
 484
 485         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 486                 /* FLR tmpA, src */
 487                 new_inst = tgsi_default_full_instruction();
 488                 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 489                 new_inst.Instruction.NumDstRegs = 1;
 490                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 491                 new_inst.Instruction.NumSrcRegs = 1;
 492                 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
 493                 tctx->emit_instruction(tctx, &new_inst);
 494
 495                 /* SUB dst, src, tmpA */
 496                 new_inst = tgsi_default_full_instruction();
 497                 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 498                 new_inst.Instruction.NumDstRegs = 1;
 499                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 500                 new_inst.Instruction.NumSrcRegs = 2;
 501                 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,Z,W));
 502                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
 503                 tctx->emit_instruction(tctx, &new_inst);
 504         }
 505 }
 506
 507 /* POW - Power
 508  *  dst.x = src0.x^{src1.x}
 509  *  dst.y = src0.x^{src1.x}
 510  *  dst.z = src0.x^{src1.x}
 511  *  dst.w = src0.x^{src1.x}
 512  *
 513  * ; needs: 1 tmp
 514  * LG2 tmpA.x, src0.x
 515  * MUL tmpA.x, src1.x, tmpA.x
 516  * EX2 dst, tmpA.x
 517  */
 518 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
 519 #define POW_TMP  1
 520 static void
 521 transform_pow(struct tgsi_transform_context *tctx,
 522                 struct tgsi_full_instruction *inst)
 523 {
 524         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 525         struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 526         struct tgsi_full_src_register *src0 = &inst->Src[0];
 527         struct tgsi_full_src_register *src1 = &inst->Src[1];
 528         struct tgsi_full_instruction new_inst;
 529
 530         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 531                 /* LG2 tmpA.x, src0.x */
 532                 new_inst = tgsi_default_full_instruction();
 533                 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 534                 new_inst.Instruction.NumDstRegs = 1;
 535                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 536                 new_inst.Instruction.NumSrcRegs = 1;
 537                 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
 538                 tctx->emit_instruction(tctx, &new_inst);
 539
 540                 /* MUL tmpA.x, src1.x, tmpA.x */
 541                 new_inst = tgsi_default_full_instruction();
 542                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 543                 new_inst.Instruction.NumDstRegs = 1;
 544                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 545                 new_inst.Instruction.NumSrcRegs = 2;
 546                 reg_src(&new_inst.Src[0], src1, SWIZ(X,_,_,_));
 547                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,_,_,_));
 548                 tctx->emit_instruction(tctx, &new_inst);
 549
 550                 /* EX2 dst, tmpA.x */
 551                 new_inst = tgsi_default_full_instruction();
 552                 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 553                 new_inst.Instruction.NumDstRegs = 1;
 554                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 555                 new_inst.Instruction.NumSrcRegs = 1;
 556                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
 557                 tctx->emit_instruction(tctx, &new_inst);
 558         }
 559 }
 560
 561 /* LIT - Light Coefficients
 562  *  dst.x = 1.0
 563  *  dst.y = max(src.x, 0.0)
 564  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
 565  *  dst.w = 1.0
 566  *
 567  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
 568  * MAX tmpA.xy, src.xy, imm{0.0}
 569  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
 570  * LG2 tmpA.y, tmpA.y
 571  * MUL tmpA.y, tmpA.z, tmpA.y
 572  * EX2 tmpA.y, tmpA.y
 573  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
 574  * MOV dst.yz, tmpA.xy
 575  * MOV dst.xw, imm{1.0}
 576  */
 577 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
 578                 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
 579 #define LIT_TMP  1
 580 static void
 581 transform_lit(struct tgsi_transform_context *tctx,
 582                 struct tgsi_full_instruction *inst)
 583 {
 584         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 585         struct tgsi_full_dst_register *dst = &inst->Dst[0];
 586         struct tgsi_full_src_register *src = &inst->Src[0];
 587         struct tgsi_full_instruction new_inst;
 588
 589         if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
 590                 /* MAX tmpA.xy, src.xy, imm{0.0} */
 591                 new_inst = tgsi_default_full_instruction();
 592                 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
 593                 new_inst.Instruction.NumDstRegs = 1;
 594                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
 595                 new_inst.Instruction.NumSrcRegs = 2;
 596                 reg_src(&new_inst.Src[0], src, SWIZ(X,Y,_,_));
 597                 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X,X,_,_));
 598                 tctx->emit_instruction(tctx, &new_inst);
 599
 600                 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
 601                 new_inst = tgsi_default_full_instruction();
 602                 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
 603                 new_inst.Instruction.NumDstRegs = 1;
 604                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 605                 new_inst.Instruction.NumSrcRegs = 3;
 606                 reg_src(&new_inst.Src[0], src, SWIZ(_,_,W,_));
 607                 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_,_,Z,_));
 608                 new_inst.Src[1].Register.Negate = true;
 609                 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,_,Z,_));
 610                 tctx->emit_instruction(tctx, &new_inst);
 611
 612                 /* LG2 tmpA.y, tmpA.y */
 613                 new_inst = tgsi_default_full_instruction();
 614                 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 615                 new_inst.Instruction.NumDstRegs = 1;
 616                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 617                 new_inst.Instruction.NumSrcRegs = 1;
 618                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
 619                 tctx->emit_instruction(tctx, &new_inst);
 620
 621                 /* MUL tmpA.y, tmpA.z, tmpA.y */
 622                 new_inst = tgsi_default_full_instruction();
 623                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 624                 new_inst.Instruction.NumDstRegs = 1;
 625                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 626                 new_inst.Instruction.NumSrcRegs = 2;
 627                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
 628                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
 629                 tctx->emit_instruction(tctx, &new_inst);
 630
 631                 /* EX2 tmpA.y, tmpA.y */
 632                 new_inst = tgsi_default_full_instruction();
 633                 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 634                 new_inst.Instruction.NumDstRegs = 1;
 635                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 636                 new_inst.Instruction.NumSrcRegs = 1;
 637                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
 638                 tctx->emit_instruction(tctx, &new_inst);
 639
 640                 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
 641                 new_inst = tgsi_default_full_instruction();
 642                 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
 643                 new_inst.Instruction.NumDstRegs = 1;
 644                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 645                 new_inst.Instruction.NumSrcRegs = 3;
 646                 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
 647                 new_inst.Src[0].Register.Negate = true;
 648                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Y,_,_));
 649                 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_,X,_,_));
 650                 tctx->emit_instruction(tctx, &new_inst);
 651
 652                 /* MOV dst.yz, tmpA.xy */
 653                 new_inst = tgsi_default_full_instruction();
 654                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 655                 new_inst.Instruction.NumDstRegs = 1;
 656                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
 657                 new_inst.Instruction.NumSrcRegs = 1;
 658                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,Y,_));
 659                 tctx->emit_instruction(tctx, &new_inst);
 660         }
 661
 662         if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
 663                 /* MOV dst.xw, imm{1.0} */
 664                 new_inst = tgsi_default_full_instruction();
 665                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 666                 new_inst.Instruction.NumDstRegs = 1;
 667                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
 668                 new_inst.Instruction.NumSrcRegs = 1;
 669                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y,_,_,Y));
 670                 tctx->emit_instruction(tctx, &new_inst);
 671         }
 672 }
 673
 674 /* EXP - Approximate Exponential Base 2
 675  *  dst.x = 2^{\lfloor src.x\rfloor}
 676  *  dst.y = src.x - \lfloor src.x\rfloor
 677  *  dst.z = 2^{src.x}
 678  *  dst.w = 1.0
 679  *
 680  * ; needs: 1 tmp, imm{1.0}
 681  * FLR tmpA.x, src.x
 682  * EX2 tmpA.y, src.x
 683  * SUB dst.y, src.x, tmpA.x
 684  * EX2 dst.x, tmpA.x
 685  * MOV dst.z, tmpA.y
 686  * MOV dst.w, imm{1.0}
 687  */
 688 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
 689                 NINST(1)+ NINST(1) - OINST(1))
 690 #define EXP_TMP  1
 691 static void
 692 transform_exp(struct tgsi_transform_context *tctx,
 693                 struct tgsi_full_instruction *inst)
 694 {
 695         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 696         struct tgsi_full_dst_register *dst = &inst->Dst[0];
 697         struct tgsi_full_src_register *src = &inst->Src[0];
 698         struct tgsi_full_instruction new_inst;
 699
 700         if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 701                 /* FLR tmpA.x, src.x */
 702                 new_inst = tgsi_default_full_instruction();
 703                 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 704                 new_inst.Instruction.NumDstRegs = 1;
 705                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 706                 new_inst.Instruction.NumSrcRegs = 1;
 707                 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
 708                 tctx->emit_instruction(tctx, &new_inst);
 709         }
 710
 711         if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 712                 /* EX2 tmpA.y, src.x */
 713                 new_inst = tgsi_default_full_instruction();
 714                 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 715                 new_inst.Instruction.NumDstRegs = 1;
 716                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 717                 new_inst.Instruction.NumSrcRegs = 1;
 718                 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
 719                 tctx->emit_instruction(tctx, &new_inst);
 720         }
 721
 722         if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 723                 /* SUB dst.y, src.x, tmpA.x */
 724                 new_inst = tgsi_default_full_instruction();
 725                 new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 726                 new_inst.Instruction.NumDstRegs = 1;
 727                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 728                 new_inst.Instruction.NumSrcRegs = 2;
 729                 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
 730                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,X,_,_));
 731                 tctx->emit_instruction(tctx, &new_inst);
 732         }
 733
 734         if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 735                 /* EX2 dst.x, tmpA.x */
 736                 new_inst = tgsi_default_full_instruction();
 737                 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 738                 new_inst.Instruction.NumDstRegs = 1;
 739                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 740                 new_inst.Instruction.NumSrcRegs = 1;
 741                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,_,_,_));
 742                 tctx->emit_instruction(tctx, &new_inst);
 743         }
 744
 745         if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 746                 /* MOV dst.z, tmpA.y */
 747                 new_inst = tgsi_default_full_instruction();
 748                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 749                 new_inst.Instruction.NumDstRegs = 1;
 750                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 751                 new_inst.Instruction.NumSrcRegs = 1;
 752                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,_,Y,_));
 753                 tctx->emit_instruction(tctx, &new_inst);
 754         }
 755
 756         if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 757                 /* MOV dst.w, imm{1.0} */
 758                 new_inst = tgsi_default_full_instruction();
 759                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 760                 new_inst.Instruction.NumDstRegs = 1;
 761                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 762                 new_inst.Instruction.NumSrcRegs = 1;
 763                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
 764                 tctx->emit_instruction(tctx, &new_inst);
 765         }
 766 }
 767
 768 /* LOG - Approximate Logarithm Base 2
 769  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
 770  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 771  *  dst.z = \log_2{|src.x|}
 772  *  dst.w = 1.0
 773  *
 774  * ; needs: 1 tmp, imm{1.0}
 775  * LG2 tmpA.x, |src.x|
 776  * FLR tmpA.y, tmpA.x
 777  * EX2 tmpA.z, tmpA.y
 778  * RCP tmpA.z, tmpA.z
 779  * MUL dst.y, |src.x|, tmpA.z
 780  * MOV dst.xz, tmpA.yx
 781  * MOV dst.w, imm{1.0}
 782  */
 783 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
 784                 NINST(2) + NINST(1) + NINST(1) - OINST(1))
 785 #define LOG_TMP  1
 786 static void
 787 transform_log(struct tgsi_transform_context *tctx,
 788                 struct tgsi_full_instruction *inst)
 789 {
 790         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 791         struct tgsi_full_dst_register *dst = &inst->Dst[0];
 792         struct tgsi_full_src_register *src = &inst->Src[0];
 793         struct tgsi_full_instruction new_inst;
 794
 795         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
 796                 /* LG2 tmpA.x, |src.x| */
 797                 new_inst = tgsi_default_full_instruction();
 798                 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 799                 new_inst.Instruction.NumDstRegs = 1;
 800                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 801                 new_inst.Instruction.NumSrcRegs = 1;
 802                 reg_src(&new_inst.Src[0], src, SWIZ(X,_,_,_));
 803                 new_inst.Src[0].Register.Absolute = true;
 804                 tctx->emit_instruction(tctx, &new_inst);
 805         }
 806
 807         if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 808                 /* FLR tmpA.y, tmpA.x */
 809                 new_inst = tgsi_default_full_instruction();
 810                 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 811                 new_inst.Instruction.NumDstRegs = 1;
 812                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 813                 new_inst.Instruction.NumSrcRegs = 1;
 814                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_,X,_,_));
 815                 tctx->emit_instruction(tctx, &new_inst);
 816         }
 817
 818         if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 819                 /* EX2 tmpA.z, tmpA.y */
 820                 new_inst = tgsi_default_full_instruction();
 821                 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 822                 new_inst.Instruction.NumDstRegs = 1;
 823                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 824                 new_inst.Instruction.NumSrcRegs = 1;
 825                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,_,_));
 826                 tctx->emit_instruction(tctx, &new_inst);
 827
 828                 /* RCP tmpA.z, tmpA.z */
 829                 new_inst = tgsi_default_full_instruction();
 830                 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
 831                 new_inst.Instruction.NumDstRegs = 1;
 832                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 833                 new_inst.Instruction.NumSrcRegs = 1;
 834                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z,_,_,_));
 835                 tctx->emit_instruction(tctx, &new_inst);
 836
 837                 /* MUL dst.y, |src.x|, tmpA.z */
 838                 new_inst = tgsi_default_full_instruction();
 839                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 840                 new_inst.Instruction.NumDstRegs = 1;
 841                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 842                 new_inst.Instruction.NumSrcRegs = 2;
 843                 reg_src(&new_inst.Src[0], src, SWIZ(_,X,_,_));
 844                 new_inst.Src[0].Register.Absolute = true;
 845                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_,Z,_,_));
 846                 tctx->emit_instruction(tctx, &new_inst);
 847         }
 848
 849         if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
 850                 /* MOV dst.xz, tmpA.yx */
 851                 new_inst = tgsi_default_full_instruction();
 852                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 853                 new_inst.Instruction.NumDstRegs = 1;
 854                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
 855                 new_inst.Instruction.NumSrcRegs = 1;
 856                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y,_,X,_));
 857                 tctx->emit_instruction(tctx, &new_inst);
 858         }
 859
 860         if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 861                 /* MOV dst.w, imm{1.0} */
 862                 new_inst = tgsi_default_full_instruction();
 863                 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 864                 new_inst.Instruction.NumDstRegs = 1;
 865                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 866                 new_inst.Instruction.NumSrcRegs = 1;
 867                 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_,_,_,Y));
 868                 tctx->emit_instruction(tctx, &new_inst);
 869         }
 870 }
 871
 872 /* DP4 - 4-component Dot Product
 873  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 874  *
 875  * DP3 - 3-component Dot Product
 876  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 877  *
 878  * DPH - Homogeneous Dot Product
 879  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 880  *
 881  * DP2 - 2-component Dot Product
 882  *   dst = src0.x \times src1.x + src0.y \times src1.y
 883  *
 884  * DP2A - 2-component Dot Product And Add
 885  *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
 886  *
 887  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
 888  * operations, which is what you'd prefer for a ISA that is natively
 889  * scalar.  Probably a native vector ISA would at least already have
 890  * DP4/DP3 instructions, but perhaps there is room for an alternative
 891  * translation for DPH/DP2/DP2A using vector instructions.
 892  *
 893  * ; needs: 1 tmp
 894  * MUL tmpA.x, src0.x, src1.x
 895  * MAD tmpA.x, src0.y, src1.y, tmpA.x
 896  * if (DPH || DP3 || DP4) {
 897  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
 898  *   if (DPH) {
 899  *     ADD tmpA.x, src1.w, tmpA.x
 900  *   } else if (DP4) {
 901  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
 902  *   }
 903  * } else if (DP2A) {
 904  *   ADD tmpA.x, src2.x, tmpA.x
 905  * }
 906  * ; fixup last instruction to replicate into dst
 907  */
 908 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
 909 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
 910 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
 911 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
 912 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
 913 #define DOTP_TMP  1
 914 static void
 915 transform_dotp(struct tgsi_transform_context *tctx,
 916                 struct tgsi_full_instruction *inst)
 917 {
 918         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
 919         struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 920         struct tgsi_full_src_register *src0 = &inst->Src[0];
 921         struct tgsi_full_src_register *src1 = &inst->Src[1];
 922         struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
 923         struct tgsi_full_instruction new_inst;
 924         unsigned opcode = inst->Instruction.Opcode;
 925
 926         /* NOTE: any potential last instruction must replicate src on all
 927          * components (since it could be re-written to write to final dst)
 928          */
 929
 930         if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 931                 /* MUL tmpA.x, src0.x, src1.x */
 932                 new_inst = tgsi_default_full_instruction();
 933                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 934                 new_inst.Instruction.NumDstRegs = 1;
 935                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 936                 new_inst.Instruction.NumSrcRegs = 2;
 937                 reg_src(&new_inst.Src[0], src0, SWIZ(X,_,_,_));
 938                 reg_src(&new_inst.Src[1], src1, SWIZ(X,_,_,_));
 939                 tctx->emit_instruction(tctx, &new_inst);
 940
 941                 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
 942                 new_inst = tgsi_default_full_instruction();
 943                 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 944                 new_inst.Instruction.NumDstRegs = 1;
 945                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 946                 new_inst.Instruction.NumSrcRegs = 3;
 947                 reg_src(&new_inst.Src[0], src0, SWIZ(Y,Y,Y,Y));
 948                 reg_src(&new_inst.Src[1], src1, SWIZ(Y,Y,Y,Y));
 949                 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
 950
 951                 if ((opcode == TGSI_OPCODE_DPH) ||
 952                                 (opcode == TGSI_OPCODE_DP3) ||
 953                                 (opcode == TGSI_OPCODE_DP4)) {
 954                         tctx->emit_instruction(tctx, &new_inst);
 955
 956                         /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
 957                         new_inst = tgsi_default_full_instruction();
 958                         new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 959                         new_inst.Instruction.NumDstRegs = 1;
 960                         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 961                         new_inst.Instruction.NumSrcRegs = 3;
 962                         reg_src(&new_inst.Src[0], src0, SWIZ(Z,Z,Z,Z));
 963                         reg_src(&new_inst.Src[1], src1, SWIZ(Z,Z,Z,Z));
 964                         reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
 965
 966                         if (opcode == TGSI_OPCODE_DPH) {
 967                                 tctx->emit_instruction(tctx, &new_inst);
 968
 969                                 /* ADD tmpA.x, src1.w, tmpA.x */
 970                                 new_inst = tgsi_default_full_instruction();
 971                                 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 972                                 new_inst.Instruction.NumDstRegs = 1;
 973                                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 974                                 new_inst.Instruction.NumSrcRegs = 2;
 975                                 reg_src(&new_inst.Src[0], src1, SWIZ(W,W,W,W));
 976                                 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
 977                         } else if (opcode == TGSI_OPCODE_DP4) {
 978                                 tctx->emit_instruction(tctx, &new_inst);
 979
 980                                 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
 981                                 new_inst = tgsi_default_full_instruction();
 982                                 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 983                                 new_inst.Instruction.NumDstRegs = 1;
 984                                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 985                                 new_inst.Instruction.NumSrcRegs = 3;
 986                                 reg_src(&new_inst.Src[0], src0, SWIZ(W,W,W,W));
 987                                 reg_src(&new_inst.Src[1], src1, SWIZ(W,W,W,W));
 988                                 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X,X,X,X));
 989                         }
 990                 } else if (opcode == TGSI_OPCODE_DP2A) {
 991                         tctx->emit_instruction(tctx, &new_inst);
 992
 993                         /* ADD tmpA.x, src2.x, tmpA.x */
 994                         new_inst = tgsi_default_full_instruction();
 995                         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 996                         new_inst.Instruction.NumDstRegs = 1;
 997                         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 998                         new_inst.Instruction.NumSrcRegs = 2;
 999                         reg_src(&new_inst.Src[0], src2, SWIZ(X,X,X,X));
1000                         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X,X,X,X));
1001                 }
1002
1003                 /* fixup last instruction to write to dst: */
1004                 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005
1006                 tctx->emit_instruction(tctx, &new_inst);
1007         }
1008 }
1009
1010 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1011  * in the case of TXP, the clamping must happen *after* projection, so
1012  * we need to lower TXP to TEX.
1013  *
1014  *   MOV tmpA, src0
1015  *   if (opc == TXP) {
1016  *     ; do perspective division manually before clamping:
1017  *     RCP tmpB, tmpA.w
1018  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1019  *     opc = TEX;
1020  *   }
1021  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1022  *   <opc> dst, tmpA, ...
1023  */
1024 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1025 #define SAMP_TMP  2
1026 static int
1027 transform_samp(struct tgsi_transform_context *tctx,
1028                 struct tgsi_full_instruction *inst)
1029 {
1030         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1031         struct tgsi_full_src_register *coord = &inst->Src[0];
1032         struct tgsi_full_src_register *samp;
1033         struct tgsi_full_instruction new_inst;
1034         /* mask is clamped coords, pmask is all coords (for projection): */
1035         unsigned mask = 0, pmask = 0, smask;
1036         unsigned opcode = inst->Instruction.Opcode;
1037
1038         if (opcode == TGSI_OPCODE_TXB2) {
1039                 samp = &inst->Src[2];
1040         } else {
1041                 samp = &inst->Src[1];
1042         }
1043
1044         /* convert sampler # to bitmask to test: */
1045         smask = 1 << samp->Register.Index;
1046
1047         /* check if we actually need to lower this one: */
1048         if (!(ctx->saturate & smask))
1049                 return -1;
1050
1051         /* figure out which coordinates need saturating:
1052          *   - RECT textures should not get saturated
1053          *   - array index coords should not get saturated
1054          */
1055         switch (inst->Texture.Texture) {
1056         case TGSI_TEXTURE_3D:
1057         case TGSI_TEXTURE_CUBE:
1058         case TGSI_TEXTURE_CUBE_ARRAY:
1059         case TGSI_TEXTURE_SHADOWCUBE:
1060         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1061                 if (ctx->config->saturate_r & smask)
1062                         mask |= TGSI_WRITEMASK_Z;
1063                 pmask |= TGSI_WRITEMASK_Z;
1064                 /* fallthrough */
1065
1066         case TGSI_TEXTURE_2D:
1067         case TGSI_TEXTURE_2D_ARRAY:
1068         case TGSI_TEXTURE_SHADOW2D:
1069         case TGSI_TEXTURE_SHADOW2D_ARRAY:
1070         case TGSI_TEXTURE_2D_MSAA:
1071         case TGSI_TEXTURE_2D_ARRAY_MSAA:
1072                 if (ctx->config->saturate_t & smask)
1073                         mask |= TGSI_WRITEMASK_Y;
1074                 pmask |= TGSI_WRITEMASK_Y;
1075                 /* fallthrough */
1076
1077         case TGSI_TEXTURE_1D:
1078         case TGSI_TEXTURE_1D_ARRAY:
1079         case TGSI_TEXTURE_SHADOW1D:
1080         case TGSI_TEXTURE_SHADOW1D_ARRAY:
1081                 if (ctx->config->saturate_s & smask)
1082                         mask |= TGSI_WRITEMASK_X;
1083                 pmask |= TGSI_WRITEMASK_X;
1084                 break;
1085
1086         /* TODO: I think we should ignore these?
1087         case TGSI_TEXTURE_RECT:
1088         case TGSI_TEXTURE_SHADOWRECT:
1089         */
1090         }
1091
1092         /* sanity check.. driver could be asking to saturate a non-
1093          * existent coordinate component:
1094          */
1095         if (!mask)
1096                 return -1;
1097
1098         /* MOV tmpA, src0 */
1099         create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1100
1101         /* This is a bit sad.. we need to clamp *after* the coords
1102          * are projected, which means lowering TXP to TEX and doing
1103          * the projection ourself.  But since I haven't figured out
1104          * how to make the lowering code deliver an electric shock
1105          * to anyone using GL_CLAMP, we must do this instead:
1106          */
1107         if (opcode == TGSI_OPCODE_TXP) {
1108                 /* RCP tmpB.x tmpA.w */
1109                 new_inst = tgsi_default_full_instruction();
1110                 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1111                 new_inst.Instruction.NumDstRegs = 1;
1112                 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1113                 new_inst.Instruction.NumSrcRegs = 1;
1114                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W,_,_,_));
1115                 tctx->emit_instruction(tctx, &new_inst);
1116
1117                 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1118                 new_inst = tgsi_default_full_instruction();
1119                 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1120                 new_inst.Instruction.NumDstRegs = 1;
1121                 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1122                 new_inst.Instruction.NumSrcRegs = 2;
1123                 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
1124                 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,X,X,X));
1125                 tctx->emit_instruction(tctx, &new_inst);
1126
1127                 opcode = TGSI_OPCODE_TEX;
1128         }
1129
1130         /* MOV_SAT tmpA.<mask>, tmpA */
1131         create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
1132                         TGSI_SAT_ZERO_ONE);
1133
1134         /* modify the texture samp instruction to take fixed up coord: */
1135         new_inst = *inst;
1136         new_inst.Instruction.Opcode = opcode;
1137         new_inst.Src[0] = ctx->tmp[A].src;
1138         tctx->emit_instruction(tctx, &new_inst);
1139
1140         return 0;
1141 }
1142
1143 /* Two-sided color emulation:
1144  * For each COLOR input, create a corresponding BCOLOR input, plus
1145  * CMP instruction to select front or back color based on FACE
1146  */
1147 #define TWOSIDE_GROW(n)  (                       \
1148                         2 +         /* FACE */               \
1149                         ((n) * 2) + /* IN[] BCOLOR[n] */     \
1150                         ((n) * 1) + /* TEMP[] */             \
1151                         ((n) * NINST(3))   /* CMP instr */   \
1152                 )
1153
1154 static void
1155 emit_twoside(struct tgsi_transform_context *tctx)
1156 {
1157         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1158         struct tgsi_shader_info *info = ctx->info;
1159         struct tgsi_full_declaration decl;
1160         struct tgsi_full_instruction new_inst;
1161         unsigned inbase, tmpbase;
1162         int i;
1163
1164         inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1165         tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1166
1167         /* additional inputs for BCOLOR's */
1168         for (i = 0; i < ctx->two_side_colors; i++) {
1169                 decl = tgsi_default_full_declaration();
1170                 decl.Declaration.File = TGSI_FILE_INPUT;
1171                 decl.Declaration.Semantic = true;
1172                 decl.Range.First = decl.Range.Last = inbase + i;
1173                 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1174                 decl.Semantic.Index =
1175                         info->input_semantic_index[ctx->two_side_idx[i]];
1176                 tctx->emit_declaration(tctx, &decl);
1177         }
1178
1179         /* additional input for FACE */
1180         if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1181                 decl = tgsi_default_full_declaration();
1182                 decl.Declaration.File = TGSI_FILE_INPUT;
1183                 decl.Declaration.Semantic = true;
1184                 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1185                 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1186                 decl.Semantic.Index = 0;
1187                 tctx->emit_declaration(tctx, &decl);
1188
1189                 ctx->face_idx = decl.Range.First;
1190         }
1191
1192         /* additional temps for COLOR/BCOLOR selection: */
1193         for (i = 0; i < ctx->two_side_colors; i++) {
1194                 decl = tgsi_default_full_declaration();
1195                 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1196                 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1197                 tctx->emit_declaration(tctx, &decl);
1198         }
1199
1200         /* and finally additional instructions to select COLOR/BCOLOR: */
1201         for (i = 0; i < ctx->two_side_colors; i++) {
1202                 new_inst = tgsi_default_full_instruction();
1203                 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1204
1205                 new_inst.Instruction.NumDstRegs = 1;
1206                 new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1207                 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1208                 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1209
1210                 new_inst.Instruction.NumSrcRegs = 3;
1211                 new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1212                 new_inst.Src[0].Register.Index = ctx->face_idx;
1213                 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1214                 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1215                 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1216                 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1217                 new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1218                 new_inst.Src[1].Register.Index = inbase + i;
1219                 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1220                 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1221                 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1222                 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1223                 new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1224                 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1225                 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1226                 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1227                 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1228                 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1229
1230                 tctx->emit_instruction(tctx, &new_inst);
1231         }
1232 }
1233
1234 static void
1235 emit_decls(struct tgsi_transform_context *tctx)
1236 {
1237         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1238         struct tgsi_shader_info *info = ctx->info;
1239         struct tgsi_full_declaration decl;
1240         struct tgsi_full_immediate immed;
1241         unsigned tmpbase;
1242         int i;
1243
1244         tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1245
1246         ctx->color_base = tmpbase + ctx->numtmp;
1247
1248         /* declare immediate: */
1249         immed = tgsi_default_full_immediate();
1250         immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1251         immed.u[0].Float = 0.0;
1252         immed.u[1].Float = 1.0;
1253         immed.u[2].Float = 128.0;
1254         immed.u[3].Float = 0.0;
1255         tctx->emit_immediate(tctx, &immed);
1256
1257         ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1258         ctx->imm.Register.Index = info->immediate_count;
1259         ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1260         ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1261         ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1262         ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1263
1264         /* declare temp regs: */
1265         for (i = 0; i < ctx->numtmp; i++) {
1266                 decl = tgsi_default_full_declaration();
1267                 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1268                 decl.Range.First = decl.Range.Last = tmpbase + i;
1269                 tctx->emit_declaration(tctx, &decl);
1270
1271                 ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1272                 ctx->tmp[i].src.Register.Index = tmpbase + i;
1273                 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1274                 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1275                 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1276                 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1277
1278                 ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1279                 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1280                 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1281         }
1282
1283         if (ctx->two_side_colors)
1284                 emit_twoside(tctx);
1285 }
1286
1287 static void
1288 rename_color_inputs(struct fd_lowering_context *ctx,
1289                 struct tgsi_full_instruction *inst)
1290 {
1291         unsigned i, j;
1292         for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1293                 struct tgsi_src_register *src = &inst->Src[i].Register;
1294                 if (src->File == TGSI_FILE_INPUT) {
1295                         for (j = 0; j < ctx->two_side_colors; j++) {
1296                                 if (src->Index == ctx->two_side_idx[j]) {
1297                                         src->File = TGSI_FILE_TEMPORARY;
1298                                         src->Index = ctx->color_base + j;
1299                                         break;
1300                                 }
1301                         }
1302                 }
1303         }
1304
1305 }
1306
1307 static void
1308 transform_instr(struct tgsi_transform_context *tctx,
1309                 struct tgsi_full_instruction *inst)
1310 {
1311         struct fd_lowering_context *ctx = fd_lowering_context(tctx);
1312
1313         if (!ctx->emitted_decls) {
1314                 emit_decls(tctx);
1315                 ctx->emitted_decls = 1;
1316         }
1317
1318         /* if emulating two-sided-color, we need to re-write some
1319          * src registers:
1320          */
1321         if (ctx->two_side_colors)
1322                 rename_color_inputs(ctx, inst);
1323
1324         switch (inst->Instruction.Opcode) {
1325         case TGSI_OPCODE_DST:
1326                 if (!ctx->config->lower_DST)
1327                         goto skip;
1328                 transform_dst(tctx, inst);
1329                 break;
1330         case TGSI_OPCODE_XPD:
1331                 if (!ctx->config->lower_XPD)
1332                         goto skip;
1333                 transform_xpd(tctx, inst);
1334                 break;
1335         case TGSI_OPCODE_SCS:
1336                 if (!ctx->config->lower_SCS)
1337                         goto skip;
1338                 transform_scs(tctx, inst);
1339                 break;
1340         case TGSI_OPCODE_LRP:
1341                 if (!ctx->config->lower_LRP)
1342                         goto skip;
1343                 transform_lrp(tctx, inst);
1344                 break;
1345         case TGSI_OPCODE_FRC:
1346                 if (!ctx->config->lower_FRC)
1347                         goto skip;
1348                 transform_frc(tctx, inst);
1349                 break;
1350         case TGSI_OPCODE_POW:
1351                 if (!ctx->config->lower_POW)
1352                         goto skip;
1353                 transform_pow(tctx, inst);
1354                 break;
1355         case TGSI_OPCODE_LIT:
1356                 if (!ctx->config->lower_LIT)
1357                         goto skip;
1358                 transform_lit(tctx, inst);
1359                 break;
1360         case TGSI_OPCODE_EXP:
1361                 if (!ctx->config->lower_EXP)
1362                         goto skip;
1363                 transform_exp(tctx, inst);
1364                 break;
1365         case TGSI_OPCODE_LOG:
1366                 if (!ctx->config->lower_LOG)
1367                         goto skip;
1368                 transform_log(tctx, inst);
1369                 break;
1370         case TGSI_OPCODE_DP4:
1371                 if (!ctx->config->lower_DP4)
1372                         goto skip;
1373                 transform_dotp(tctx, inst);
1374                 break;
1375         case TGSI_OPCODE_DP3:
1376                 if (!ctx->config->lower_DP3)
1377                         goto skip;
1378                 transform_dotp(tctx, inst);
1379                 break;
1380         case TGSI_OPCODE_DPH:
1381                 if (!ctx->config->lower_DPH)
1382                         goto skip;
1383                 transform_dotp(tctx, inst);
1384                 break;
1385         case TGSI_OPCODE_DP2:
1386                 if (!ctx->config->lower_DP2)
1387                         goto skip;
1388                 transform_dotp(tctx, inst);
1389                 break;
1390         case TGSI_OPCODE_DP2A:
1391                 if (!ctx->config->lower_DP2A)
1392                         goto skip;
1393                 transform_dotp(tctx, inst);
1394                 break;
1395         case TGSI_OPCODE_TEX:
1396         case TGSI_OPCODE_TXP:
1397         case TGSI_OPCODE_TXB:
1398         case TGSI_OPCODE_TXB2:
1399         case TGSI_OPCODE_TXL:
1400                 if (transform_samp(tctx, inst))
1401                         goto skip;
1402                 break;
1403         default:
1404         skip:
1405                 tctx->emit_instruction(tctx, inst);
1406                 break;
1407         }
1408 }
1409
1410 /* returns NULL if no lowering required, else returns the new
1411  * tokens (which caller is required to free()).  In either case
1412  * returns the current info.
1413  */
1414 const struct tgsi_token *
1415 fd_transform_lowering(const struct fd_lowering_config *config,
1416                 const struct tgsi_token *tokens,
1417                 struct tgsi_shader_info *info)
1418 {
1419         struct fd_lowering_context ctx;
1420         struct tgsi_token *newtoks;
1421         int newlen, numtmp;
1422
1423         /* sanity check in case limit is ever increased: */
1424         assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1425
1426         memset(&ctx, 0, sizeof(ctx));
1427         ctx.base.transform_instruction = transform_instr;
1428         ctx.info = info;
1429         ctx.config = config;
1430
1431         tgsi_scan_shader(tokens, info);
1432
1433         /* if we are adding fragment shader support to emulate two-sided
1434          * color, then figure out the number of additional inputs we need
1435          * to create for BCOLOR's..
1436          */
1437         if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1438                         config->color_two_side) {
1439                 int i;
1440                 ctx.face_idx = -1;
1441                 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1442                         if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1443                                 ctx.two_side_idx[ctx.two_side_colors++] = i;
1444                         if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1445                                 ctx.face_idx = i;
1446                 }
1447         }
1448
1449         ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1450
1451 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1452         /* if there are no instructions to lower, then we are done: */
1453         if (!(OPCS(DST) ||
1454                         OPCS(XPD) ||
1455                         OPCS(SCS) ||
1456                         OPCS(LRP) ||
1457                         OPCS(FRC) ||
1458                         OPCS(POW) ||
1459                         OPCS(LIT) ||
1460                         OPCS(EXP) ||
1461                         OPCS(LOG) ||
1462                         OPCS(DP4) ||
1463                         OPCS(DP3) ||
1464                         OPCS(DPH) ||
1465                         OPCS(DP2) ||
1466                         OPCS(DP2A) ||
1467                         ctx.two_side_colors ||
1468                         ctx.saturate))
1469                 return NULL;
1470
1471 #if 0  /* debug */
1472         _debug_printf("BEFORE:");
1473         tgsi_dump(tokens, 0);
1474 #endif
1475
1476         numtmp = 0;
1477         newlen = tgsi_num_tokens(tokens);
1478         if (OPCS(DST)) {
1479                 newlen += DST_GROW * OPCS(DST);
1480                 numtmp = MAX2(numtmp, DST_TMP);
1481         }
1482         if (OPCS(XPD)) {
1483                 newlen += XPD_GROW * OPCS(XPD);
1484                 numtmp = MAX2(numtmp, XPD_TMP);
1485         }
1486         if (OPCS(SCS)) {
1487                 newlen += SCS_GROW * OPCS(SCS);
1488                 numtmp = MAX2(numtmp, SCS_TMP);
1489         }
1490         if (OPCS(LRP)) {
1491                 newlen += LRP_GROW * OPCS(LRP);
1492                 numtmp = MAX2(numtmp, LRP_TMP);
1493         }
1494         if (OPCS(FRC)) {
1495                 newlen += FRC_GROW * OPCS(FRC);
1496                 numtmp = MAX2(numtmp, FRC_TMP);
1497         }
1498         if (OPCS(POW)) {
1499                 newlen += POW_GROW * OPCS(POW);
1500                 numtmp = MAX2(numtmp, POW_TMP);
1501         }
1502         if (OPCS(LIT)) {
1503                 newlen += LIT_GROW * OPCS(LIT);
1504                 numtmp = MAX2(numtmp, LIT_TMP);
1505         }
1506         if (OPCS(EXP)) {
1507                 newlen += EXP_GROW * OPCS(EXP);
1508                 numtmp = MAX2(numtmp, EXP_TMP);
1509         }
1510         if (OPCS(LOG)) {
1511                 newlen += LOG_GROW * OPCS(LOG);
1512                 numtmp = MAX2(numtmp, LOG_TMP);
1513         }
1514         if (OPCS(DP4)) {
1515                 newlen += DP4_GROW * OPCS(DP4);
1516                 numtmp = MAX2(numtmp, DOTP_TMP);
1517         }
1518         if (OPCS(DP3)) {
1519                 newlen += DP3_GROW * OPCS(DP3);
1520                 numtmp = MAX2(numtmp, DOTP_TMP);
1521         }
1522         if (OPCS(DPH)) {
1523                 newlen += DPH_GROW * OPCS(DPH);
1524                 numtmp = MAX2(numtmp, DOTP_TMP);
1525         }
1526         if (OPCS(DP2)) {
1527                 newlen += DP2_GROW * OPCS(DP2);
1528                 numtmp = MAX2(numtmp, DOTP_TMP);
1529         }
1530         if (OPCS(DP2A)) {
1531                 newlen += DP2A_GROW * OPCS(DP2A);
1532                 numtmp = MAX2(numtmp, DOTP_TMP);
1533         }
1534         if (ctx.saturate) {
1535                 int n = info->opcode_count[TGSI_OPCODE_TEX] +
1536                         info->opcode_count[TGSI_OPCODE_TXP] +
1537                         info->opcode_count[TGSI_OPCODE_TXB] +
1538                         info->opcode_count[TGSI_OPCODE_TXB2] +
1539                         info->opcode_count[TGSI_OPCODE_TXL];
1540                 newlen += SAMP_GROW * n;
1541                 numtmp = MAX2(numtmp, SAMP_TMP);
1542         }
1543
1544         /* specifically don't include two_side_colors temps in the count: */
1545         ctx.numtmp = numtmp;
1546
1547         if (ctx.two_side_colors) {
1548                 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1549                 /* note: we permanently consume temp regs, re-writing references
1550                  * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1551                  * instruction that selects which varying to use):
1552                  */
1553                 numtmp += ctx.two_side_colors;
1554         }
1555
1556         newlen += 2 * numtmp;
1557         newlen += 5;        /* immediate */
1558
1559         newtoks = tgsi_alloc_tokens(newlen);
1560         if (!newtoks)
1561                 return NULL;
1562
1563         tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1564
1565         tgsi_scan_shader(newtoks, info);
1566
1567 #if 0  /* debug */
1568         _debug_printf("AFTER:");
1569         tgsi_dump(newtoks, 0);
1570 #endif
1571
1572         return newtoks;
1573 }