src/gallium/auxiliary/tgsi/tgsi_lowering.c

   1 /*
   2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "tgsi/tgsi_transform.h"
  28 #include "tgsi/tgsi_scan.h"
  29 #include "tgsi/tgsi_dump.h"
  30
  31 #include "util/u_debug.h"
  32 #include "util/u_math.h"
  33
  34 #include "tgsi_lowering.h"
  35
  36 struct tgsi_lowering_context {
  37    struct tgsi_transform_context base;
  38    const struct tgsi_lowering_config *config;
  39    struct tgsi_shader_info *info;
  40    unsigned two_side_colors;
  41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
  42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
  43    int face_idx;
  44    unsigned numtmp;
  45    struct {
  46       struct tgsi_full_src_register src;
  47       struct tgsi_full_dst_register dst;
  48    } tmp[2];
  49 #define A 0
  50 #define B 1
  51    struct tgsi_full_src_register imm;
  52    int emitted_decls;
  53    unsigned saturate;
  54 };
  55
  56 static inline struct tgsi_lowering_context *
  57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
  58 {
  59    return (struct tgsi_lowering_context *)tctx;
  60 }
  61
  62 /*
  63  * Utility helpers:
  64  */
  65
  66 static void
  67 reg_dst(struct tgsi_full_dst_register *dst,
  68         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
  69 {
  70    *dst = *orig_dst;
  71    dst->Register.WriteMask &= wrmask;
  72    assert(dst->Register.WriteMask);
  73 }
  74
  75 static inline void
  76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
  77 {
  78    swiz[0] = src->SwizzleX;
  79    swiz[1] = src->SwizzleY;
  80    swiz[2] = src->SwizzleZ;
  81    swiz[3] = src->SwizzleW;
  82 }
  83
  84 static void
  85 reg_src(struct tgsi_full_src_register *src,
  86         const struct tgsi_full_src_register *orig_src,
  87         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
  88 {
  89    unsigned swiz[4];
  90    get_swiz(swiz, &orig_src->Register);
  91    *src = *orig_src;
  92    src->Register.SwizzleX = swiz[sx];
  93    src->Register.SwizzleY = swiz[sy];
  94    src->Register.SwizzleZ = swiz[sz];
  95    src->Register.SwizzleW = swiz[sw];
  96 }
  97
  98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
  99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
 100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
 101
 102 /*
 103  * if (dst.x aliases src.x) {
 104  *   MOV tmpA.x, src.x
 105  *   src = tmpA
 106  * }
 107  * COS dst.x, src.x
 108  * SIN dst.y, src.x
 109  * MOV dst.zw, imm{0.0, 1.0}
 110  */
 111 static bool
 112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
 113         const struct tgsi_full_src_register *src, unsigned src_mask)
 114 {
 115    if ((dst->Register.File == src->Register.File) &&
 116        (dst->Register.Index == src->Register.Index)) {
 117       unsigned i, actual_mask = 0;
 118       unsigned swiz[4];
 119       get_swiz(swiz, &src->Register);
 120       for (i = 0; i < 4; i++)
 121          if (src_mask & (1 << i))
 122             actual_mask |= (1 << swiz[i]);
 123       if (actual_mask & dst_mask)
 124          return true;
 125    }
 126    return false;
 127 }
 128
 129 static void
 130 create_mov(struct tgsi_transform_context *tctx,
 131            const struct tgsi_full_dst_register *dst,
 132            const struct tgsi_full_src_register *src,
 133            unsigned mask, unsigned saturate)
 134 {
 135    struct tgsi_full_instruction new_inst;
 136
 137    new_inst = tgsi_default_full_instruction();
 138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 139    new_inst.Instruction.Saturate = saturate;
 140    new_inst.Instruction.NumDstRegs = 1;
 141    reg_dst(&new_inst.Dst[0], dst, mask);
 142    new_inst.Instruction.NumSrcRegs = 1;
 143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 144    tctx->emit_instruction(tctx, &new_inst);
 145 }
 146
 147 /* to help calculate # of tgsi tokens for a lowering.. we assume
 148  * the worst case, ie. removed instructions don't have ADDR[] or
 149  * anything which increases the # of tokens per src/dst and the
 150  * inserted instructions do.
 151  *
 152  * OINST() - old instruction
 153  *    1         : instruction itself
 154  *    1         : dst
 155  *    1 * nargs : srcN
 156  *
 157  * NINST() - new instruction
 158  *    1         : instruction itself
 159  *    2         : dst
 160  *    2 * nargs : srcN
 161  */
 162
 163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
 164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
 165
 166 /*
 167  * Lowering Translators:
 168  */
 169
 170 /* DST - Distance Vector
 171  *   dst.x = 1.0
 172  *   dst.y = src0.y \times src1.y
 173  *   dst.z = src0.z
 174  *   dst.w = src1.w
 175  *
 176  * ; note: could be more clever and use just a single temp
 177  * ;       if I was clever enough to re-write the swizzles.
 178  * ; needs: 2 tmp, imm{1.0}
 179  * if (dst.y aliases src0.z) {
 180  *   MOV tmpA.yz, src0.yz
 181  *   src0 = tmpA
 182  * }
 183  * if (dst.yz aliases src1.w) {
 184  *   MOV tmpB.yw, src1.yw
 185  *   src1 = tmpB
 186  * }
 187  * MUL dst.y, src0.y, src1.y
 188  * MOV dst.z, src0.z
 189  * MOV dst.w, src1.w
 190  * MOV dst.x, imm{1.0}
 191  */
 192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
 193                 NINST(1) + NINST(1) - OINST(2))
 194 #define DST_TMP  2
 195 static void
 196 transform_dst(struct tgsi_transform_context *tctx,
 197               struct tgsi_full_instruction *inst)
 198 {
 199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 201    struct tgsi_full_src_register *src0 = &inst->Src[0];
 202    struct tgsi_full_src_register *src1 = &inst->Src[1];
 203    struct tgsi_full_instruction new_inst;
 204
 205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
 206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
 207       src0 = &ctx->tmp[A].src;
 208    }
 209
 210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
 211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
 212       src1 = &ctx->tmp[B].src;
 213    }
 214
 215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 216       /* MUL dst.y, src0.y, src1.y */
 217       new_inst = tgsi_default_full_instruction();
 218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 219       new_inst.Instruction.NumDstRegs = 1;
 220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 221       new_inst.Instruction.NumSrcRegs = 2;
 222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
 223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
 224       tctx->emit_instruction(tctx, &new_inst);
 225    }
 226
 227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 228       /* MOV dst.z, src0.z */
 229       new_inst = tgsi_default_full_instruction();
 230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 231       new_inst.Instruction.NumDstRegs = 1;
 232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 233       new_inst.Instruction.NumSrcRegs = 1;
 234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
 235       tctx->emit_instruction(tctx, &new_inst);
 236    }
 237
 238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 239       /* MOV dst.w, src1.w */
 240       new_inst = tgsi_default_full_instruction();
 241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 242       new_inst.Instruction.NumDstRegs = 1;
 243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 244       new_inst.Instruction.NumSrcRegs = 1;
 245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
 246       tctx->emit_instruction(tctx, &new_inst);
 247    }
 248
 249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 250       /* MOV dst.x, imm{1.0} */
 251       new_inst = tgsi_default_full_instruction();
 252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 253       new_inst.Instruction.NumDstRegs = 1;
 254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 255       new_inst.Instruction.NumSrcRegs = 1;
 256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
 257       tctx->emit_instruction(tctx, &new_inst);
 258    }
 259 }
 260
 261 /* XPD - Cross Product
 262  *   dst.x = src0.y \times src1.z - src1.y \times src0.z
 263  *   dst.y = src0.z \times src1.x - src1.z \times src0.x
 264  *   dst.z = src0.x \times src1.y - src1.x \times src0.y
 265  *   dst.w = 1.0
 266  *
 267  * ; needs: 2 tmp, imm{1.0}
 268  * MUL tmpA.xyz, src0.yzx, src1.zxy
 269  * MUL tmpB.xyz, src1.yzx, src0.zxy
 270  * SUB dst.xyz, tmpA.xyz, tmpB.xyz
 271  * MOV dst.w, imm{1.0}
 272  */
 273 #define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
 274 #define XPD_TMP  2
 275 static void
 276 transform_xpd(struct tgsi_transform_context *tctx,
 277               struct tgsi_full_instruction *inst)
 278 {
 279    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 280    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 281    struct tgsi_full_src_register *src0 = &inst->Src[0];
 282    struct tgsi_full_src_register *src1 = &inst->Src[1];
 283    struct tgsi_full_instruction new_inst;
 284
 285    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
 286       /* MUL tmpA.xyz, src0.yzx, src1.zxy */
 287       new_inst = tgsi_default_full_instruction();
 288       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 289       new_inst.Instruction.NumDstRegs = 1;
 290       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
 291       new_inst.Instruction.NumSrcRegs = 2;
 292       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
 293       reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
 294       tctx->emit_instruction(tctx, &new_inst);
 295
 296       /* MUL tmpB.xyz, src1.yzx, src0.zxy */
 297       new_inst = tgsi_default_full_instruction();
 298       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 299       new_inst.Instruction.NumDstRegs = 1;
 300       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
 301       new_inst.Instruction.NumSrcRegs = 2;
 302       reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
 303       reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
 304       tctx->emit_instruction(tctx, &new_inst);
 305
 306       /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
 307       new_inst = tgsi_default_full_instruction();
 308       new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 309       new_inst.Instruction.NumDstRegs = 1;
 310       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
 311       new_inst.Instruction.NumSrcRegs = 2;
 312       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
 313       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _));
 314       tctx->emit_instruction(tctx, &new_inst);
 315    }
 316
 317    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 318       /* MOV dst.w, imm{1.0} */
 319       new_inst = tgsi_default_full_instruction();
 320       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 321       new_inst.Instruction.NumDstRegs = 1;
 322       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 323       new_inst.Instruction.NumSrcRegs = 1;
 324       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
 325       tctx->emit_instruction(tctx, &new_inst);
 326    }
 327 }
 328
 329 /* SCS - Sine Cosine
 330  *   dst.x = \cos{src.x}
 331  *   dst.y = \sin{src.x}
 332  *   dst.z = 0.0
 333  *   dst.w = 1.0
 334  *
 335  * ; needs: 1 tmp, imm{0.0, 1.0}
 336  * if (dst.x aliases src.x) {
 337  *   MOV tmpA.x, src.x
 338  *   src = tmpA
 339  * }
 340  * COS dst.x, src.x
 341  * SIN dst.y, src.x
 342  * MOV dst.zw, imm{0.0, 1.0}
 343  */
 344 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
 345 #define SCS_TMP  1
 346 static void
 347 transform_scs(struct tgsi_transform_context *tctx,
 348               struct tgsi_full_instruction *inst)
 349 {
 350    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 351    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 352    struct tgsi_full_src_register *src = &inst->Src[0];
 353    struct tgsi_full_instruction new_inst;
 354
 355    if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
 356       create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
 357       src = &ctx->tmp[A].src;
 358    }
 359
 360    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 361       /* COS dst.x, src.x */
 362       new_inst = tgsi_default_full_instruction();
 363       new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
 364       new_inst.Instruction.NumDstRegs = 1;
 365       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 366       new_inst.Instruction.NumSrcRegs = 1;
 367       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 368       tctx->emit_instruction(tctx, &new_inst);
 369    }
 370
 371    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 372       /* SIN dst.y, src.x */
 373       new_inst = tgsi_default_full_instruction();
 374       new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
 375       new_inst.Instruction.NumDstRegs = 1;
 376       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 377       new_inst.Instruction.NumSrcRegs = 1;
 378       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 379       tctx->emit_instruction(tctx, &new_inst);
 380    }
 381
 382    if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
 383       /* MOV dst.zw, imm{0.0, 1.0} */
 384       new_inst = tgsi_default_full_instruction();
 385       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 386       new_inst.Instruction.NumDstRegs = 1;
 387       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
 388       new_inst.Instruction.NumSrcRegs = 1;
 389       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
 390       tctx->emit_instruction(tctx, &new_inst);
 391    }
 392 }
 393
 394 /* LRP - Linear Interpolate
 395  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
 396  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
 397  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
 398  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
 399  *
 400  * ; needs: 2 tmp, imm{1.0}
 401  * MUL tmpA, src0, src1
 402  * SUB tmpB, imm{1.0}, src0
 403  * MUL tmpB, tmpB, src2
 404  * ADD dst, tmpA, tmpB
 405  */
 406 #define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
 407 #define LRP_TMP  2
 408 static void
 409 transform_lrp(struct tgsi_transform_context *tctx,
 410               struct tgsi_full_instruction *inst)
 411 {
 412    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 413    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 414    struct tgsi_full_src_register *src0 = &inst->Src[0];
 415    struct tgsi_full_src_register *src1 = &inst->Src[1];
 416    struct tgsi_full_src_register *src2 = &inst->Src[2];
 417    struct tgsi_full_instruction new_inst;
 418
 419    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 420       /* MUL tmpA, src0, src1 */
 421       new_inst = tgsi_default_full_instruction();
 422       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 423       new_inst.Instruction.NumDstRegs = 1;
 424       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 425       new_inst.Instruction.NumSrcRegs = 2;
 426       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 427       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
 428       tctx->emit_instruction(tctx, &new_inst);
 429
 430       /* SUB tmpB, imm{1.0}, src0 */
 431       new_inst = tgsi_default_full_instruction();
 432       new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 433       new_inst.Instruction.NumDstRegs = 1;
 434       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
 435       new_inst.Instruction.NumSrcRegs = 2;
 436       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
 437       reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
 438       tctx->emit_instruction(tctx, &new_inst);
 439
 440       /* MUL tmpB, tmpB, src2 */
 441       new_inst = tgsi_default_full_instruction();
 442       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 443       new_inst.Instruction.NumDstRegs = 1;
 444       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
 445       new_inst.Instruction.NumSrcRegs = 2;
 446       reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
 447       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
 448       tctx->emit_instruction(tctx, &new_inst);
 449
 450       /* ADD dst, tmpA, tmpB */
 451       new_inst = tgsi_default_full_instruction();
 452       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 453       new_inst.Instruction.NumDstRegs = 1;
 454       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 455       new_inst.Instruction.NumSrcRegs = 2;
 456       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 457       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
 458       tctx->emit_instruction(tctx, &new_inst);
 459    }
 460 }
 461
 462 /* FRC - Fraction
 463  *  dst.x = src.x - \lfloor src.x\rfloor
 464  *  dst.y = src.y - \lfloor src.y\rfloor
 465  *  dst.z = src.z - \lfloor src.z\rfloor
 466  *  dst.w = src.w - \lfloor src.w\rfloor
 467  *
 468  * ; needs: 1 tmp
 469  * FLR tmpA, src
 470  * SUB dst, src, tmpA
 471  */
 472 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
 473 #define FRC_TMP  1
 474 static void
 475 transform_frc(struct tgsi_transform_context *tctx,
 476               struct tgsi_full_instruction *inst)
 477 {
 478    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 479    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 480    struct tgsi_full_src_register *src = &inst->Src[0];
 481    struct tgsi_full_instruction new_inst;
 482
 483    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 484       /* FLR tmpA, src */
 485       new_inst = tgsi_default_full_instruction();
 486       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 487       new_inst.Instruction.NumDstRegs = 1;
 488       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 489       new_inst.Instruction.NumSrcRegs = 1;
 490       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 491       tctx->emit_instruction(tctx, &new_inst);
 492
 493       /* SUB dst, src, tmpA */
 494       new_inst = tgsi_default_full_instruction();
 495       new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 496       new_inst.Instruction.NumDstRegs = 1;
 497       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 498       new_inst.Instruction.NumSrcRegs = 2;
 499       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 500       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 501       tctx->emit_instruction(tctx, &new_inst);
 502    }
 503 }
 504
 505 /* POW - Power
 506  *  dst.x = src0.x^{src1.x}
 507  *  dst.y = src0.x^{src1.x}
 508  *  dst.z = src0.x^{src1.x}
 509  *  dst.w = src0.x^{src1.x}
 510  *
 511  * ; needs: 1 tmp
 512  * LG2 tmpA.x, src0.x
 513  * MUL tmpA.x, src1.x, tmpA.x
 514  * EX2 dst, tmpA.x
 515  */
 516 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
 517 #define POW_TMP  1
 518 static void
 519 transform_pow(struct tgsi_transform_context *tctx,
 520               struct tgsi_full_instruction *inst)
 521 {
 522    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 523    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 524    struct tgsi_full_src_register *src0 = &inst->Src[0];
 525    struct tgsi_full_src_register *src1 = &inst->Src[1];
 526    struct tgsi_full_instruction new_inst;
 527
 528    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 529       /* LG2 tmpA.x, src0.x */
 530       new_inst = tgsi_default_full_instruction();
 531       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 532       new_inst.Instruction.NumDstRegs = 1;
 533       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 534       new_inst.Instruction.NumSrcRegs = 1;
 535       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
 536       tctx->emit_instruction(tctx, &new_inst);
 537
 538       /* MUL tmpA.x, src1.x, tmpA.x */
 539       new_inst = tgsi_default_full_instruction();
 540       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 541       new_inst.Instruction.NumDstRegs = 1;
 542       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 543       new_inst.Instruction.NumSrcRegs = 2;
 544       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
 545       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 546       tctx->emit_instruction(tctx, &new_inst);
 547
 548       /* EX2 dst, tmpA.x */
 549       new_inst = tgsi_default_full_instruction();
 550       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 551       new_inst.Instruction.NumDstRegs = 1;
 552       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 553       new_inst.Instruction.NumSrcRegs = 1;
 554       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 555       tctx->emit_instruction(tctx, &new_inst);
 556    }
 557 }
 558
 559 /* LIT - Light Coefficients
 560  *  dst.x = 1.0
 561  *  dst.y = max(src.x, 0.0)
 562  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
 563  *  dst.w = 1.0
 564  *
 565  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
 566  * MAX tmpA.xy, src.xy, imm{0.0}
 567  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
 568  * LG2 tmpA.y, tmpA.y
 569  * MUL tmpA.y, tmpA.z, tmpA.y
 570  * EX2 tmpA.y, tmpA.y
 571  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
 572  * MOV dst.yz, tmpA.xy
 573  * MOV dst.xw, imm{1.0}
 574  */
 575 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
 576                 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
 577 #define LIT_TMP  1
 578 static void
 579 transform_lit(struct tgsi_transform_context *tctx,
 580               struct tgsi_full_instruction *inst)
 581 {
 582    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 583    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 584    struct tgsi_full_src_register *src = &inst->Src[0];
 585    struct tgsi_full_instruction new_inst;
 586
 587    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
 588       /* MAX tmpA.xy, src.xy, imm{0.0} */
 589       new_inst = tgsi_default_full_instruction();
 590       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
 591       new_inst.Instruction.NumDstRegs = 1;
 592       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
 593       new_inst.Instruction.NumSrcRegs = 2;
 594       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
 595       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
 596       tctx->emit_instruction(tctx, &new_inst);
 597
 598       /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
 599       new_inst = tgsi_default_full_instruction();
 600       new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
 601       new_inst.Instruction.NumDstRegs = 1;
 602       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 603       new_inst.Instruction.NumSrcRegs = 3;
 604       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
 605       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
 606       new_inst.Src[1].Register.Negate = true;
 607       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
 608       tctx->emit_instruction(tctx, &new_inst);
 609
 610       /* LG2 tmpA.y, tmpA.y */
 611       new_inst = tgsi_default_full_instruction();
 612       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 613       new_inst.Instruction.NumDstRegs = 1;
 614       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 615       new_inst.Instruction.NumSrcRegs = 1;
 616       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 617       tctx->emit_instruction(tctx, &new_inst);
 618
 619       /* MUL tmpA.y, tmpA.z, tmpA.y */
 620       new_inst = tgsi_default_full_instruction();
 621       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 622       new_inst.Instruction.NumDstRegs = 1;
 623       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 624       new_inst.Instruction.NumSrcRegs = 2;
 625       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
 626       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
 627       tctx->emit_instruction(tctx, &new_inst);
 628
 629       /* EX2 tmpA.y, tmpA.y */
 630       new_inst = tgsi_default_full_instruction();
 631       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 632       new_inst.Instruction.NumDstRegs = 1;
 633       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 634       new_inst.Instruction.NumSrcRegs = 1;
 635       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 636       tctx->emit_instruction(tctx, &new_inst);
 637
 638       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
 639       new_inst = tgsi_default_full_instruction();
 640       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
 641       new_inst.Instruction.NumDstRegs = 1;
 642       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 643       new_inst.Instruction.NumSrcRegs = 3;
 644       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 645       new_inst.Src[0].Register.Negate = true;
 646       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
 647       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
 648       tctx->emit_instruction(tctx, &new_inst);
 649
 650       /* MOV dst.yz, tmpA.xy */
 651       new_inst = tgsi_default_full_instruction();
 652       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 653       new_inst.Instruction.NumDstRegs = 1;
 654       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
 655       new_inst.Instruction.NumSrcRegs = 1;
 656       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
 657       tctx->emit_instruction(tctx, &new_inst);
 658    }
 659
 660    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
 661       /* MOV dst.xw, imm{1.0} */
 662       new_inst = tgsi_default_full_instruction();
 663       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 664       new_inst.Instruction.NumDstRegs = 1;
 665       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
 666       new_inst.Instruction.NumSrcRegs = 1;
 667       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
 668       tctx->emit_instruction(tctx, &new_inst);
 669    }
 670 }
 671
 672 /* EXP - Approximate Exponential Base 2
 673  *  dst.x = 2^{\lfloor src.x\rfloor}
 674  *  dst.y = src.x - \lfloor src.x\rfloor
 675  *  dst.z = 2^{src.x}
 676  *  dst.w = 1.0
 677  *
 678  * ; needs: 1 tmp, imm{1.0}
 679  * FLR tmpA.x, src.x
 680  * EX2 tmpA.y, src.x
 681  * SUB dst.y, src.x, tmpA.x
 682  * EX2 dst.x, tmpA.x
 683  * MOV dst.z, tmpA.y
 684  * MOV dst.w, imm{1.0}
 685  */
 686 #define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
 687                 NINST(1)+ NINST(1) - OINST(1))
 688 #define EXP_TMP  1
 689 static void
 690 transform_exp(struct tgsi_transform_context *tctx,
 691               struct tgsi_full_instruction *inst)
 692 {
 693    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 694    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 695    struct tgsi_full_src_register *src = &inst->Src[0];
 696    struct tgsi_full_instruction new_inst;
 697
 698    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 699       /* FLR tmpA.x, src.x */
 700       new_inst = tgsi_default_full_instruction();
 701       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 702       new_inst.Instruction.NumDstRegs = 1;
 703       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 704       new_inst.Instruction.NumSrcRegs = 1;
 705       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 706       tctx->emit_instruction(tctx, &new_inst);
 707    }
 708
 709    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 710       /* EX2 tmpA.y, src.x */
 711       new_inst = tgsi_default_full_instruction();
 712       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 713       new_inst.Instruction.NumDstRegs = 1;
 714       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 715       new_inst.Instruction.NumSrcRegs = 1;
 716       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 717       tctx->emit_instruction(tctx, &new_inst);
 718    }
 719
 720    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 721       /* SUB dst.y, src.x, tmpA.x */
 722       new_inst = tgsi_default_full_instruction();
 723       new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
 724       new_inst.Instruction.NumDstRegs = 1;
 725       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 726       new_inst.Instruction.NumSrcRegs = 2;
 727       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 728       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 729       tctx->emit_instruction(tctx, &new_inst);
 730    }
 731
 732    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 733       /* EX2 dst.x, tmpA.x */
 734       new_inst = tgsi_default_full_instruction();
 735       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 736       new_inst.Instruction.NumDstRegs = 1;
 737       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 738       new_inst.Instruction.NumSrcRegs = 1;
 739       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 740       tctx->emit_instruction(tctx, &new_inst);
 741    }
 742
 743    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 744       /* MOV dst.z, tmpA.y */
 745       new_inst = tgsi_default_full_instruction();
 746       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 747       new_inst.Instruction.NumDstRegs = 1;
 748       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 749       new_inst.Instruction.NumSrcRegs = 1;
 750       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
 751       tctx->emit_instruction(tctx, &new_inst);
 752    }
 753
 754    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 755       /* MOV dst.w, imm{1.0} */
 756       new_inst = tgsi_default_full_instruction();
 757       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 758       new_inst.Instruction.NumDstRegs = 1;
 759       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 760       new_inst.Instruction.NumSrcRegs = 1;
 761       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
 762       tctx->emit_instruction(tctx, &new_inst);
 763    }
 764 }
 765
 766 /* LOG - Approximate Logarithm Base 2
 767  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
 768  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 769  *  dst.z = \log_2{|src.x|}
 770  *  dst.w = 1.0
 771  *
 772  * ; needs: 1 tmp, imm{1.0}
 773  * LG2 tmpA.x, |src.x|
 774  * FLR tmpA.y, tmpA.x
 775  * EX2 tmpA.z, tmpA.y
 776  * RCP tmpA.z, tmpA.z
 777  * MUL dst.y, |src.x|, tmpA.z
 778  * MOV dst.xz, tmpA.yx
 779  * MOV dst.w, imm{1.0}
 780  */
 781 #define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
 782                 NINST(2) + NINST(1) + NINST(1) - OINST(1))
 783 #define LOG_TMP  1
 784 static void
 785 transform_log(struct tgsi_transform_context *tctx,
 786               struct tgsi_full_instruction *inst)
 787 {
 788    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 789    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 790    struct tgsi_full_src_register *src = &inst->Src[0];
 791    struct tgsi_full_instruction new_inst;
 792
 793    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
 794       /* LG2 tmpA.x, |src.x| */
 795       new_inst = tgsi_default_full_instruction();
 796       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 797       new_inst.Instruction.NumDstRegs = 1;
 798       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 799       new_inst.Instruction.NumSrcRegs = 1;
 800       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 801       new_inst.Src[0].Register.Absolute = true;
 802       tctx->emit_instruction(tctx, &new_inst);
 803    }
 804
 805    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 806       /* FLR tmpA.y, tmpA.x */
 807       new_inst = tgsi_default_full_instruction();
 808       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 809       new_inst.Instruction.NumDstRegs = 1;
 810       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 811       new_inst.Instruction.NumSrcRegs = 1;
 812       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 813       tctx->emit_instruction(tctx, &new_inst);
 814    }
 815
 816    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 817       /* EX2 tmpA.z, tmpA.y */
 818       new_inst = tgsi_default_full_instruction();
 819       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 820       new_inst.Instruction.NumDstRegs = 1;
 821       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 822       new_inst.Instruction.NumSrcRegs = 1;
 823       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 824       tctx->emit_instruction(tctx, &new_inst);
 825
 826       /* RCP tmpA.z, tmpA.z */
 827       new_inst = tgsi_default_full_instruction();
 828       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
 829       new_inst.Instruction.NumDstRegs = 1;
 830       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 831       new_inst.Instruction.NumSrcRegs = 1;
 832       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
 833       tctx->emit_instruction(tctx, &new_inst);
 834
 835       /* MUL dst.y, |src.x|, tmpA.z */
 836       new_inst = tgsi_default_full_instruction();
 837       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 838       new_inst.Instruction.NumDstRegs = 1;
 839       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 840       new_inst.Instruction.NumSrcRegs = 2;
 841       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 842       new_inst.Src[0].Register.Absolute = true;
 843       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
 844       tctx->emit_instruction(tctx, &new_inst);
 845    }
 846
 847    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
 848       /* MOV dst.xz, tmpA.yx */
 849       new_inst = tgsi_default_full_instruction();
 850       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 851       new_inst.Instruction.NumDstRegs = 1;
 852       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
 853       new_inst.Instruction.NumSrcRegs = 1;
 854       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
 855       tctx->emit_instruction(tctx, &new_inst);
 856    }
 857
 858    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 859       /* MOV dst.w, imm{1.0} */
 860       new_inst = tgsi_default_full_instruction();
 861       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 862       new_inst.Instruction.NumDstRegs = 1;
 863       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 864       new_inst.Instruction.NumSrcRegs = 1;
 865       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
 866       tctx->emit_instruction(tctx, &new_inst);
 867    }
 868 }
 869
 870 /* DP4 - 4-component Dot Product
 871  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 872  *
 873  * DP3 - 3-component Dot Product
 874  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 875  *
 876  * DPH - Homogeneous Dot Product
 877  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 878  *
 879  * DP2 - 2-component Dot Product
 880  *   dst = src0.x \times src1.x + src0.y \times src1.y
 881  *
 882  * DP2A - 2-component Dot Product And Add
 883  *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
 884  *
 885  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
 886  * operations, which is what you'd prefer for a ISA that is natively
 887  * scalar.  Probably a native vector ISA would at least already have
 888  * DP4/DP3 instructions, but perhaps there is room for an alternative
 889  * translation for DPH/DP2/DP2A using vector instructions.
 890  *
 891  * ; needs: 1 tmp
 892  * MUL tmpA.x, src0.x, src1.x
 893  * MAD tmpA.x, src0.y, src1.y, tmpA.x
 894  * if (DPH || DP3 || DP4) {
 895  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
 896  *   if (DPH) {
 897  *     ADD tmpA.x, src1.w, tmpA.x
 898  *   } else if (DP4) {
 899  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
 900  *   }
 901  * } else if (DP2A) {
 902  *   ADD tmpA.x, src2.x, tmpA.x
 903  * }
 904  * ; fixup last instruction to replicate into dst
 905  */
 906 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
 907 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
 908 #define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
 909 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
 910 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
 911 #define DOTP_TMP  1
 912 static void
 913 transform_dotp(struct tgsi_transform_context *tctx,
 914                struct tgsi_full_instruction *inst)
 915 {
 916    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 917    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 918    struct tgsi_full_src_register *src0 = &inst->Src[0];
 919    struct tgsi_full_src_register *src1 = &inst->Src[1];
 920    struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
 921    struct tgsi_full_instruction new_inst;
 922    unsigned opcode = inst->Instruction.Opcode;
 923
 924    /* NOTE: any potential last instruction must replicate src on all
 925     * components (since it could be re-written to write to final dst)
 926     */
 927
 928    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 929       /* MUL tmpA.x, src0.x, src1.x */
 930       new_inst = tgsi_default_full_instruction();
 931       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 932       new_inst.Instruction.NumDstRegs = 1;
 933       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 934       new_inst.Instruction.NumSrcRegs = 2;
 935       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
 936       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
 937       tctx->emit_instruction(tctx, &new_inst);
 938
 939       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
 940       new_inst = tgsi_default_full_instruction();
 941       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 942       new_inst.Instruction.NumDstRegs = 1;
 943       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 944       new_inst.Instruction.NumSrcRegs = 3;
 945       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
 946       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
 947       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 948
 949       if ((opcode == TGSI_OPCODE_DPH) ||
 950           (opcode == TGSI_OPCODE_DP3) ||
 951           (opcode == TGSI_OPCODE_DP4)) {
 952          tctx->emit_instruction(tctx, &new_inst);
 953
 954          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
 955          new_inst = tgsi_default_full_instruction();
 956          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 957          new_inst.Instruction.NumDstRegs = 1;
 958          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 959          new_inst.Instruction.NumSrcRegs = 3;
 960          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
 961          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
 962          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 963
 964          if (opcode == TGSI_OPCODE_DPH) {
 965             tctx->emit_instruction(tctx, &new_inst);
 966
 967             /* ADD tmpA.x, src1.w, tmpA.x */
 968             new_inst = tgsi_default_full_instruction();
 969             new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 970             new_inst.Instruction.NumDstRegs = 1;
 971             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 972             new_inst.Instruction.NumSrcRegs = 2;
 973             reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
 974             reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 975          } else if (opcode == TGSI_OPCODE_DP4) {
 976             tctx->emit_instruction(tctx, &new_inst);
 977
 978             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
 979             new_inst = tgsi_default_full_instruction();
 980             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 981             new_inst.Instruction.NumDstRegs = 1;
 982             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 983             new_inst.Instruction.NumSrcRegs = 3;
 984             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
 985             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
 986             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 987          }
 988       } else if (opcode == TGSI_OPCODE_DP2A) {
 989          tctx->emit_instruction(tctx, &new_inst);
 990
 991          /* ADD tmpA.x, src2.x, tmpA.x */
 992          new_inst = tgsi_default_full_instruction();
 993          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 994          new_inst.Instruction.NumDstRegs = 1;
 995          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 996          new_inst.Instruction.NumSrcRegs = 2;
 997          reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
 998          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 999       }
1000
1001       /* fixup last instruction to write to dst: */
1002       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1003
1004       tctx->emit_instruction(tctx, &new_inst);
1005    }
1006 }
1007
1008 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1009  * in the case of TXP, the clamping must happen *after* projection, so
1010  * we need to lower TXP to TEX.
1011  *
1012  *   MOV tmpA, src0
1013  *   if (opc == TXP) {
1014  *     ; do perspective division manually before clamping:
1015  *     RCP tmpB, tmpA.w
1016  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1017  *     opc = TEX;
1018  *   }
1019  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1020  *   <opc> dst, tmpA, ...
1021  */
1022 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1023 #define SAMP_TMP  2
1024 static int
1025 transform_samp(struct tgsi_transform_context *tctx,
1026                struct tgsi_full_instruction *inst)
1027 {
1028    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1029    struct tgsi_full_src_register *coord = &inst->Src[0];
1030    struct tgsi_full_src_register *samp;
1031    struct tgsi_full_instruction new_inst;
1032    /* mask is clamped coords, pmask is all coords (for projection): */
1033    unsigned mask = 0, pmask = 0, smask;
1034    unsigned tex = inst->Texture.Texture;
1035    unsigned opcode = inst->Instruction.Opcode;
1036    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1037                    (ctx->config->lower_TXP & (1 << tex));
1038
1039    if (opcode == TGSI_OPCODE_TXB2) {
1040       samp = &inst->Src[2];
1041    } else {
1042       samp = &inst->Src[1];
1043    }
1044
1045    /* convert sampler # to bitmask to test: */
1046    smask = 1 << samp->Register.Index;
1047
1048    /* check if we actually need to lower this one: */
1049    if (!(ctx->saturate & smask) && !lower_txp)
1050       return -1;
1051
1052    /* figure out which coordinates need saturating:
1053     *   - RECT textures should not get saturated
1054     *   - array index coords should not get saturated
1055     */
1056    switch (tex) {
1057    case TGSI_TEXTURE_3D:
1058    case TGSI_TEXTURE_CUBE:
1059    case TGSI_TEXTURE_CUBE_ARRAY:
1060    case TGSI_TEXTURE_SHADOWCUBE:
1061    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1062       if (ctx->config->saturate_r & smask)
1063          mask |= TGSI_WRITEMASK_Z;
1064       pmask |= TGSI_WRITEMASK_Z;
1065       /* fallthrough */
1066
1067    case TGSI_TEXTURE_2D:
1068    case TGSI_TEXTURE_2D_ARRAY:
1069    case TGSI_TEXTURE_SHADOW2D:
1070    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1071    case TGSI_TEXTURE_2D_MSAA:
1072    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1073       if (ctx->config->saturate_t & smask)
1074          mask |= TGSI_WRITEMASK_Y;
1075       pmask |= TGSI_WRITEMASK_Y;
1076       /* fallthrough */
1077
1078    case TGSI_TEXTURE_1D:
1079    case TGSI_TEXTURE_1D_ARRAY:
1080    case TGSI_TEXTURE_SHADOW1D:
1081    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1082       if (ctx->config->saturate_s & smask)
1083          mask |= TGSI_WRITEMASK_X;
1084       pmask |= TGSI_WRITEMASK_X;
1085       break;
1086
1087    case TGSI_TEXTURE_RECT:
1088    case TGSI_TEXTURE_SHADOWRECT:
1089       /* we don't saturate, but in case of lower_txp we
1090        * still need to do the perspective divide:
1091        */
1092        pmask = TGSI_WRITEMASK_XY;
1093        break;
1094    }
1095
1096    /* sanity check.. driver could be asking to saturate a non-
1097     * existent coordinate component:
1098     */
1099    if (!mask && !lower_txp)
1100       return -1;
1101
1102    /* MOV tmpA, src0 */
1103    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1104
1105    /* This is a bit sad.. we need to clamp *after* the coords
1106     * are projected, which means lowering TXP to TEX and doing
1107     * the projection ourself.  But since I haven't figured out
1108     * how to make the lowering code deliver an electric shock
1109     * to anyone using GL_CLAMP, we must do this instead:
1110     */
1111    if (opcode == TGSI_OPCODE_TXP) {
1112       /* RCP tmpB.x tmpA.w */
1113       new_inst = tgsi_default_full_instruction();
1114       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1115       new_inst.Instruction.NumDstRegs = 1;
1116       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1117       new_inst.Instruction.NumSrcRegs = 1;
1118       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1119       tctx->emit_instruction(tctx, &new_inst);
1120
1121       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1122       new_inst = tgsi_default_full_instruction();
1123       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1124       new_inst.Instruction.NumDstRegs = 1;
1125       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1126       new_inst.Instruction.NumSrcRegs = 2;
1127       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1128       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1129       tctx->emit_instruction(tctx, &new_inst);
1130
1131       opcode = TGSI_OPCODE_TEX;
1132    }
1133
1134    /* MOV_SAT tmpA.<mask>, tmpA */
1135    if (mask) {
1136       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
1137                  TGSI_SAT_ZERO_ONE);
1138    }
1139
1140    /* modify the texture samp instruction to take fixed up coord: */
1141    new_inst = *inst;
1142    new_inst.Instruction.Opcode = opcode;
1143    new_inst.Src[0] = ctx->tmp[A].src;
1144    tctx->emit_instruction(tctx, &new_inst);
1145
1146    return 0;
1147 }
1148
1149 /* Two-sided color emulation:
1150  * For each COLOR input, create a corresponding BCOLOR input, plus
1151  * CMP instruction to select front or back color based on FACE
1152  */
1153 #define TWOSIDE_GROW(n)  (                      \
1154       2 +         /* FACE */                    \
1155       ((n) * 2) + /* IN[] BCOLOR[n] */          \
1156       ((n) * 1) + /* TEMP[] */                  \
1157       ((n) * NINST(3))   /* CMP instr */        \
1158       )
1159
1160 static void
1161 emit_twoside(struct tgsi_transform_context *tctx)
1162 {
1163    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1164    struct tgsi_shader_info *info = ctx->info;
1165    struct tgsi_full_declaration decl;
1166    struct tgsi_full_instruction new_inst;
1167    unsigned inbase, tmpbase;
1168    int i;
1169
1170    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1171    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1172
1173    /* additional inputs for BCOLOR's */
1174    for (i = 0; i < ctx->two_side_colors; i++) {
1175       decl = tgsi_default_full_declaration();
1176       decl.Declaration.File = TGSI_FILE_INPUT;
1177       decl.Declaration.Semantic = true;
1178       decl.Range.First = decl.Range.Last = inbase + i;
1179       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1180       decl.Semantic.Index =
1181          info->input_semantic_index[ctx->two_side_idx[i]];
1182       tctx->emit_declaration(tctx, &decl);
1183    }
1184
1185    /* additional input for FACE */
1186    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1187       decl = tgsi_default_full_declaration();
1188       decl.Declaration.File = TGSI_FILE_INPUT;
1189       decl.Declaration.Semantic = true;
1190       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1191       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1192       decl.Semantic.Index = 0;
1193       tctx->emit_declaration(tctx, &decl);
1194
1195       ctx->face_idx = decl.Range.First;
1196    }
1197
1198    /* additional temps for COLOR/BCOLOR selection: */
1199    for (i = 0; i < ctx->two_side_colors; i++) {
1200       decl = tgsi_default_full_declaration();
1201       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1202       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1203       tctx->emit_declaration(tctx, &decl);
1204    }
1205
1206    /* and finally additional instructions to select COLOR/BCOLOR: */
1207    for (i = 0; i < ctx->two_side_colors; i++) {
1208       new_inst = tgsi_default_full_instruction();
1209       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1210
1211       new_inst.Instruction.NumDstRegs = 1;
1212       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1213       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1214       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1215
1216       new_inst.Instruction.NumSrcRegs = 3;
1217       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1218       new_inst.Src[0].Register.Index = ctx->face_idx;
1219       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1220       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1221       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1222       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1223       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1224       new_inst.Src[1].Register.Index = inbase + i;
1225       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1226       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1227       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1228       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1229       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1230       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1231       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1232       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1233       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1234       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1235
1236       tctx->emit_instruction(tctx, &new_inst);
1237    }
1238 }
1239
1240 static void
1241 emit_decls(struct tgsi_transform_context *tctx)
1242 {
1243    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1244    struct tgsi_shader_info *info = ctx->info;
1245    struct tgsi_full_declaration decl;
1246    struct tgsi_full_immediate immed;
1247    unsigned tmpbase;
1248    int i;
1249
1250    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1251
1252    ctx->color_base = tmpbase + ctx->numtmp;
1253
1254    /* declare immediate: */
1255    immed = tgsi_default_full_immediate();
1256    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1257    immed.u[0].Float = 0.0;
1258    immed.u[1].Float = 1.0;
1259    immed.u[2].Float = 128.0;
1260    immed.u[3].Float = 0.0;
1261    tctx->emit_immediate(tctx, &immed);
1262
1263    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1264    ctx->imm.Register.Index = info->immediate_count;
1265    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1266    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1267    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1268    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1269
1270    /* declare temp regs: */
1271    for (i = 0; i < ctx->numtmp; i++) {
1272       decl = tgsi_default_full_declaration();
1273       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1274       decl.Range.First = decl.Range.Last = tmpbase + i;
1275       tctx->emit_declaration(tctx, &decl);
1276
1277       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1278       ctx->tmp[i].src.Register.Index = tmpbase + i;
1279       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1280       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1281       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1282       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1283
1284       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1285       ctx->tmp[i].dst.Register.Index = tmpbase + i;
1286       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1287    }
1288
1289    if (ctx->two_side_colors)
1290       emit_twoside(tctx);
1291 }
1292
1293 static void
1294 rename_color_inputs(struct tgsi_lowering_context *ctx,
1295                     struct tgsi_full_instruction *inst)
1296 {
1297    unsigned i, j;
1298    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1299       struct tgsi_src_register *src = &inst->Src[i].Register;
1300       if (src->File == TGSI_FILE_INPUT) {
1301          for (j = 0; j < ctx->two_side_colors; j++) {
1302             if (src->Index == ctx->two_side_idx[j]) {
1303                src->File = TGSI_FILE_TEMPORARY;
1304                src->Index = ctx->color_base + j;
1305                break;
1306             }
1307          }
1308       }
1309    }
1310
1311 }
1312
1313 static void
1314 transform_instr(struct tgsi_transform_context *tctx,
1315                 struct tgsi_full_instruction *inst)
1316 {
1317    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1318
1319    if (!ctx->emitted_decls) {
1320       emit_decls(tctx);
1321       ctx->emitted_decls = 1;
1322    }
1323
1324    /* if emulating two-sided-color, we need to re-write some
1325     * src registers:
1326     */
1327    if (ctx->two_side_colors)
1328       rename_color_inputs(ctx, inst);
1329
1330    switch (inst->Instruction.Opcode) {
1331    case TGSI_OPCODE_DST:
1332       if (!ctx->config->lower_DST)
1333          goto skip;
1334       transform_dst(tctx, inst);
1335       break;
1336    case TGSI_OPCODE_XPD:
1337       if (!ctx->config->lower_XPD)
1338          goto skip;
1339       transform_xpd(tctx, inst);
1340       break;
1341    case TGSI_OPCODE_SCS:
1342       if (!ctx->config->lower_SCS)
1343          goto skip;
1344       transform_scs(tctx, inst);
1345       break;
1346    case TGSI_OPCODE_LRP:
1347       if (!ctx->config->lower_LRP)
1348          goto skip;
1349       transform_lrp(tctx, inst);
1350       break;
1351    case TGSI_OPCODE_FRC:
1352       if (!ctx->config->lower_FRC)
1353          goto skip;
1354       transform_frc(tctx, inst);
1355       break;
1356    case TGSI_OPCODE_POW:
1357       if (!ctx->config->lower_POW)
1358          goto skip;
1359       transform_pow(tctx, inst);
1360       break;
1361    case TGSI_OPCODE_LIT:
1362       if (!ctx->config->lower_LIT)
1363          goto skip;
1364       transform_lit(tctx, inst);
1365       break;
1366    case TGSI_OPCODE_EXP:
1367       if (!ctx->config->lower_EXP)
1368          goto skip;
1369       transform_exp(tctx, inst);
1370       break;
1371    case TGSI_OPCODE_LOG:
1372       if (!ctx->config->lower_LOG)
1373          goto skip;
1374       transform_log(tctx, inst);
1375       break;
1376    case TGSI_OPCODE_DP4:
1377       if (!ctx->config->lower_DP4)
1378          goto skip;
1379       transform_dotp(tctx, inst);
1380       break;
1381    case TGSI_OPCODE_DP3:
1382       if (!ctx->config->lower_DP3)
1383          goto skip;
1384       transform_dotp(tctx, inst);
1385       break;
1386    case TGSI_OPCODE_DPH:
1387       if (!ctx->config->lower_DPH)
1388          goto skip;
1389       transform_dotp(tctx, inst);
1390       break;
1391    case TGSI_OPCODE_DP2:
1392       if (!ctx->config->lower_DP2)
1393          goto skip;
1394       transform_dotp(tctx, inst);
1395       break;
1396    case TGSI_OPCODE_DP2A:
1397       if (!ctx->config->lower_DP2A)
1398          goto skip;
1399       transform_dotp(tctx, inst);
1400       break;
1401    case TGSI_OPCODE_TEX:
1402    case TGSI_OPCODE_TXP:
1403    case TGSI_OPCODE_TXB:
1404    case TGSI_OPCODE_TXB2:
1405    case TGSI_OPCODE_TXL:
1406       if (transform_samp(tctx, inst))
1407          goto skip;
1408       break;
1409    default:
1410    skip:
1411       tctx->emit_instruction(tctx, inst);
1412       break;
1413    }
1414 }
1415
1416 /* returns NULL if no lowering required, else returns the new
1417  * tokens (which caller is required to free()).  In either case
1418  * returns the current info.
1419  */
1420 const struct tgsi_token *
1421 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1422                         const struct tgsi_token *tokens,
1423                         struct tgsi_shader_info *info)
1424 {
1425    struct tgsi_lowering_context ctx;
1426    struct tgsi_token *newtoks;
1427    int newlen, numtmp;
1428
1429    /* sanity check in case limit is ever increased: */
1430    assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1431
1432    memset(&ctx, 0, sizeof(ctx));
1433    ctx.base.transform_instruction = transform_instr;
1434    ctx.info = info;
1435    ctx.config = config;
1436
1437    tgsi_scan_shader(tokens, info);
1438
1439    /* if we are adding fragment shader support to emulate two-sided
1440     * color, then figure out the number of additional inputs we need
1441     * to create for BCOLOR's..
1442     */
1443    if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
1444        config->color_two_side) {
1445       int i;
1446       ctx.face_idx = -1;
1447       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1448          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1449             ctx.two_side_idx[ctx.two_side_colors++] = i;
1450          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1451             ctx.face_idx = i;
1452       }
1453    }
1454
1455    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1456
1457 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1458    /* if there are no instructions to lower, then we are done: */
1459    if (!(OPCS(DST) ||
1460          OPCS(XPD) ||
1461          OPCS(SCS) ||
1462          OPCS(LRP) ||
1463          OPCS(FRC) ||
1464          OPCS(POW) ||
1465          OPCS(LIT) ||
1466          OPCS(EXP) ||
1467          OPCS(LOG) ||
1468          OPCS(DP4) ||
1469          OPCS(DP3) ||
1470          OPCS(DPH) ||
1471          OPCS(DP2) ||
1472          OPCS(DP2A) ||
1473          OPCS(TXP) ||
1474          ctx.two_side_colors ||
1475          ctx.saturate))
1476       return NULL;
1477
1478 #if 0  /* debug */
1479    _debug_printf("BEFORE:");
1480    tgsi_dump(tokens, 0);
1481 #endif
1482
1483    numtmp = 0;
1484    newlen = tgsi_num_tokens(tokens);
1485    if (OPCS(DST)) {
1486       newlen += DST_GROW * OPCS(DST);
1487       numtmp = MAX2(numtmp, DST_TMP);
1488    }
1489    if (OPCS(XPD)) {
1490       newlen += XPD_GROW * OPCS(XPD);
1491       numtmp = MAX2(numtmp, XPD_TMP);
1492    }
1493    if (OPCS(SCS)) {
1494       newlen += SCS_GROW * OPCS(SCS);
1495       numtmp = MAX2(numtmp, SCS_TMP);
1496    }
1497    if (OPCS(LRP)) {
1498       newlen += LRP_GROW * OPCS(LRP);
1499       numtmp = MAX2(numtmp, LRP_TMP);
1500    }
1501    if (OPCS(FRC)) {
1502       newlen += FRC_GROW * OPCS(FRC);
1503       numtmp = MAX2(numtmp, FRC_TMP);
1504    }
1505    if (OPCS(POW)) {
1506       newlen += POW_GROW * OPCS(POW);
1507       numtmp = MAX2(numtmp, POW_TMP);
1508    }
1509    if (OPCS(LIT)) {
1510       newlen += LIT_GROW * OPCS(LIT);
1511       numtmp = MAX2(numtmp, LIT_TMP);
1512    }
1513    if (OPCS(EXP)) {
1514       newlen += EXP_GROW * OPCS(EXP);
1515       numtmp = MAX2(numtmp, EXP_TMP);
1516    }
1517    if (OPCS(LOG)) {
1518       newlen += LOG_GROW * OPCS(LOG);
1519       numtmp = MAX2(numtmp, LOG_TMP);
1520    }
1521    if (OPCS(DP4)) {
1522       newlen += DP4_GROW * OPCS(DP4);
1523       numtmp = MAX2(numtmp, DOTP_TMP);
1524    }
1525    if (OPCS(DP3)) {
1526       newlen += DP3_GROW * OPCS(DP3);
1527       numtmp = MAX2(numtmp, DOTP_TMP);
1528    }
1529    if (OPCS(DPH)) {
1530       newlen += DPH_GROW * OPCS(DPH);
1531       numtmp = MAX2(numtmp, DOTP_TMP);
1532    }
1533    if (OPCS(DP2)) {
1534       newlen += DP2_GROW * OPCS(DP2);
1535       numtmp = MAX2(numtmp, DOTP_TMP);
1536    }
1537    if (OPCS(DP2A)) {
1538       newlen += DP2A_GROW * OPCS(DP2A);
1539       numtmp = MAX2(numtmp, DOTP_TMP);
1540    }
1541    if (ctx.saturate || config->lower_TXP) {
1542       int n = 0;
1543
1544       if (ctx.saturate) {
1545          n = info->opcode_count[TGSI_OPCODE_TEX] +
1546             info->opcode_count[TGSI_OPCODE_TXP] +
1547             info->opcode_count[TGSI_OPCODE_TXB] +
1548             info->opcode_count[TGSI_OPCODE_TXB2] +
1549             info->opcode_count[TGSI_OPCODE_TXL];
1550       } else if (config->lower_TXP) {
1551           n = info->opcode_count[TGSI_OPCODE_TXP];
1552       }
1553
1554       newlen += SAMP_GROW * n;
1555       numtmp = MAX2(numtmp, SAMP_TMP);
1556    }
1557
1558    /* specifically don't include two_side_colors temps in the count: */
1559    ctx.numtmp = numtmp;
1560
1561    if (ctx.two_side_colors) {
1562       newlen += TWOSIDE_GROW(ctx.two_side_colors);
1563       /* note: we permanently consume temp regs, re-writing references
1564        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1565        * instruction that selects which varying to use):
1566        */
1567       numtmp += ctx.two_side_colors;
1568    }
1569
1570    newlen += 2 * numtmp;
1571    newlen += 5;        /* immediate */
1572
1573    newtoks = tgsi_alloc_tokens(newlen);
1574    if (!newtoks)
1575       return NULL;
1576
1577    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1578
1579    tgsi_scan_shader(newtoks, info);
1580
1581 #if 0  /* debug */
1582    _debug_printf("AFTER:");
1583    tgsi_dump(newtoks, 0);
1584 #endif
1585
1586    return newtoks;
1587 }