src/gallium/auxiliary/tgsi/tgsi_lowering.c

   1 /*
   2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "tgsi/tgsi_transform.h"
  28 #include "tgsi/tgsi_scan.h"
  29 #include "tgsi/tgsi_dump.h"
  30
  31 #include "util/u_debug.h"
  32 #include "util/u_math.h"
  33
  34 #include "tgsi_lowering.h"
  35
  36 struct tgsi_lowering_context {
  37    struct tgsi_transform_context base;
  38    const struct tgsi_lowering_config *config;
  39    struct tgsi_shader_info *info;
  40    unsigned two_side_colors;
  41    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
  42    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
  43    int face_idx;
  44    unsigned numtmp;
  45    struct {
  46       struct tgsi_full_src_register src;
  47       struct tgsi_full_dst_register dst;
  48    } tmp[2];
  49 #define A 0
  50 #define B 1
  51    struct tgsi_full_src_register imm;
  52    int emitted_decls;
  53    unsigned saturate;
  54 };
  55
  56 static inline struct tgsi_lowering_context *
  57 tgsi_lowering_context(struct tgsi_transform_context *tctx)
  58 {
  59    return (struct tgsi_lowering_context *)tctx;
  60 }
  61
  62 /*
  63  * Utility helpers:
  64  */
  65
  66 static void
  67 reg_dst(struct tgsi_full_dst_register *dst,
  68         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
  69 {
  70    *dst = *orig_dst;
  71    dst->Register.WriteMask &= wrmask;
  72    assert(dst->Register.WriteMask);
  73 }
  74
  75 static inline void
  76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
  77 {
  78    swiz[0] = src->SwizzleX;
  79    swiz[1] = src->SwizzleY;
  80    swiz[2] = src->SwizzleZ;
  81    swiz[3] = src->SwizzleW;
  82 }
  83
  84 static void
  85 reg_src(struct tgsi_full_src_register *src,
  86         const struct tgsi_full_src_register *orig_src,
  87         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
  88 {
  89    unsigned swiz[4];
  90    get_swiz(swiz, &orig_src->Register);
  91    *src = *orig_src;
  92    src->Register.SwizzleX = swiz[sx];
  93    src->Register.SwizzleY = swiz[sy];
  94    src->Register.SwizzleZ = swiz[sz];
  95    src->Register.SwizzleW = swiz[sw];
  96 }
  97
  98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
  99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
 100       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
 101
 102 /*
 103  * if (dst.x aliases src.x) {
 104  *   MOV tmpA.x, src.x
 105  *   src = tmpA
 106  * }
 107  * COS dst.x, src.x
 108  * SIN dst.y, src.x
 109  * MOV dst.zw, imm{0.0, 1.0}
 110  */
 111 static bool
 112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
 113         const struct tgsi_full_src_register *src, unsigned src_mask)
 114 {
 115    if ((dst->Register.File == src->Register.File) &&
 116        (dst->Register.Index == src->Register.Index)) {
 117       unsigned i, actual_mask = 0;
 118       unsigned swiz[4];
 119       get_swiz(swiz, &src->Register);
 120       for (i = 0; i < 4; i++)
 121          if (src_mask & (1 << i))
 122             actual_mask |= (1 << swiz[i]);
 123       if (actual_mask & dst_mask)
 124          return true;
 125    }
 126    return false;
 127 }
 128
 129 static void
 130 create_mov(struct tgsi_transform_context *tctx,
 131            const struct tgsi_full_dst_register *dst,
 132            const struct tgsi_full_src_register *src,
 133            unsigned mask, unsigned saturate)
 134 {
 135    struct tgsi_full_instruction new_inst;
 136
 137    new_inst = tgsi_default_full_instruction();
 138    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 139    new_inst.Instruction.Saturate = saturate;
 140    new_inst.Instruction.NumDstRegs = 1;
 141    reg_dst(&new_inst.Dst[0], dst, mask);
 142    new_inst.Instruction.NumSrcRegs = 1;
 143    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 144    tctx->emit_instruction(tctx, &new_inst);
 145 }
 146
 147 /* to help calculate # of tgsi tokens for a lowering.. we assume
 148  * the worst case, ie. removed instructions don't have ADDR[] or
 149  * anything which increases the # of tokens per src/dst and the
 150  * inserted instructions do.
 151  *
 152  * OINST() - old instruction
 153  *    1         : instruction itself
 154  *    1         : dst
 155  *    1 * nargs : srcN
 156  *
 157  * NINST() - new instruction
 158  *    1         : instruction itself
 159  *    2         : dst
 160  *    2 * nargs : srcN
 161  */
 162
 163 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
 164 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
 165
 166 /*
 167  * Lowering Translators:
 168  */
 169
 170 /* DST - Distance Vector
 171  *   dst.x = 1.0
 172  *   dst.y = src0.y \times src1.y
 173  *   dst.z = src0.z
 174  *   dst.w = src1.w
 175  *
 176  * ; note: could be more clever and use just a single temp
 177  * ;       if I was clever enough to re-write the swizzles.
 178  * ; needs: 2 tmp, imm{1.0}
 179  * if (dst.y aliases src0.z) {
 180  *   MOV tmpA.yz, src0.yz
 181  *   src0 = tmpA
 182  * }
 183  * if (dst.yz aliases src1.w) {
 184  *   MOV tmpB.yw, src1.yw
 185  *   src1 = tmpB
 186  * }
 187  * MUL dst.y, src0.y, src1.y
 188  * MOV dst.z, src0.z
 189  * MOV dst.w, src1.w
 190  * MOV dst.x, imm{1.0}
 191  */
 192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
 193                 NINST(1) + NINST(1) - OINST(2))
 194 #define DST_TMP  2
 195 static void
 196 transform_dst(struct tgsi_transform_context *tctx,
 197               struct tgsi_full_instruction *inst)
 198 {
 199    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 200    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 201    struct tgsi_full_src_register *src0 = &inst->Src[0];
 202    struct tgsi_full_src_register *src1 = &inst->Src[1];
 203    struct tgsi_full_instruction new_inst;
 204
 205    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
 206       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
 207       src0 = &ctx->tmp[A].src;
 208    }
 209
 210    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
 211       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
 212       src1 = &ctx->tmp[B].src;
 213    }
 214
 215    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 216       /* MUL dst.y, src0.y, src1.y */
 217       new_inst = tgsi_default_full_instruction();
 218       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 219       new_inst.Instruction.NumDstRegs = 1;
 220       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 221       new_inst.Instruction.NumSrcRegs = 2;
 222       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
 223       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
 224       tctx->emit_instruction(tctx, &new_inst);
 225    }
 226
 227    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 228       /* MOV dst.z, src0.z */
 229       new_inst = tgsi_default_full_instruction();
 230       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 231       new_inst.Instruction.NumDstRegs = 1;
 232       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 233       new_inst.Instruction.NumSrcRegs = 1;
 234       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
 235       tctx->emit_instruction(tctx, &new_inst);
 236    }
 237
 238    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 239       /* MOV dst.w, src1.w */
 240       new_inst = tgsi_default_full_instruction();
 241       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 242       new_inst.Instruction.NumDstRegs = 1;
 243       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 244       new_inst.Instruction.NumSrcRegs = 1;
 245       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
 246       tctx->emit_instruction(tctx, &new_inst);
 247    }
 248
 249    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 250       /* MOV dst.x, imm{1.0} */
 251       new_inst = tgsi_default_full_instruction();
 252       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 253       new_inst.Instruction.NumDstRegs = 1;
 254       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 255       new_inst.Instruction.NumSrcRegs = 1;
 256       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
 257       tctx->emit_instruction(tctx, &new_inst);
 258    }
 259 }
 260
 261 /* LRP - Linear Interpolate
 262  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
 263  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
 264  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
 265  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
 266  *
 267  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
 268  * can then become: src0 \times src1 - (src0 \times src2 - src2)
 269  *
 270  * ; needs: 1 tmp
 271  * MAD tmpA, src0, src2, -src2
 272  * MAD dst, src0, src1, -tmpA
 273  */
 274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
 275 #define LRP_TMP  1
 276 static void
 277 transform_lrp(struct tgsi_transform_context *tctx,
 278               struct tgsi_full_instruction *inst)
 279 {
 280    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 281    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 282    struct tgsi_full_src_register *src0 = &inst->Src[0];
 283    struct tgsi_full_src_register *src1 = &inst->Src[1];
 284    struct tgsi_full_src_register *src2 = &inst->Src[2];
 285    struct tgsi_full_instruction new_inst;
 286
 287    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 288       /* MAD tmpA, src0, src2, -src2 */
 289       new_inst = tgsi_default_full_instruction();
 290       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 291       new_inst.Instruction.NumDstRegs = 1;
 292       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 293       new_inst.Instruction.NumSrcRegs = 3;
 294       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 295       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
 296       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
 297       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
 298       tctx->emit_instruction(tctx, &new_inst);
 299
 300       /* MAD dst, src0, src1, -tmpA */
 301       new_inst = tgsi_default_full_instruction();
 302       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 303       new_inst.Instruction.NumDstRegs = 1;
 304       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 305       new_inst.Instruction.NumSrcRegs = 3;
 306       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 307       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
 308       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 309       new_inst.Src[2].Register.Negate = true;
 310       tctx->emit_instruction(tctx, &new_inst);
 311    }
 312 }
 313
 314 /* FRC - Fraction
 315  *  dst.x = src.x - \lfloor src.x\rfloor
 316  *  dst.y = src.y - \lfloor src.y\rfloor
 317  *  dst.z = src.z - \lfloor src.z\rfloor
 318  *  dst.w = src.w - \lfloor src.w\rfloor
 319  *
 320  * ; needs: 1 tmp
 321  * FLR tmpA, src
 322  * SUB dst, src, tmpA
 323  */
 324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
 325 #define FRC_TMP  1
 326 static void
 327 transform_frc(struct tgsi_transform_context *tctx,
 328               struct tgsi_full_instruction *inst)
 329 {
 330    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 331    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 332    struct tgsi_full_src_register *src = &inst->Src[0];
 333    struct tgsi_full_instruction new_inst;
 334
 335    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 336       /* FLR tmpA, src */
 337       new_inst = tgsi_default_full_instruction();
 338       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 339       new_inst.Instruction.NumDstRegs = 1;
 340       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 341       new_inst.Instruction.NumSrcRegs = 1;
 342       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 343       tctx->emit_instruction(tctx, &new_inst);
 344
 345       /* SUB dst, src, tmpA */
 346       new_inst = tgsi_default_full_instruction();
 347       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 348       new_inst.Instruction.NumDstRegs = 1;
 349       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 350       new_inst.Instruction.NumSrcRegs = 2;
 351       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
 352       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 353       new_inst.Src[1].Register.Negate = 1;
 354       tctx->emit_instruction(tctx, &new_inst);
 355    }
 356 }
 357
 358 /* POW - Power
 359  *  dst.x = src0.x^{src1.x}
 360  *  dst.y = src0.x^{src1.x}
 361  *  dst.z = src0.x^{src1.x}
 362  *  dst.w = src0.x^{src1.x}
 363  *
 364  * ; needs: 1 tmp
 365  * LG2 tmpA.x, src0.x
 366  * MUL tmpA.x, src1.x, tmpA.x
 367  * EX2 dst, tmpA.x
 368  */
 369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
 370 #define POW_TMP  1
 371 static void
 372 transform_pow(struct tgsi_transform_context *tctx,
 373               struct tgsi_full_instruction *inst)
 374 {
 375    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 376    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 377    struct tgsi_full_src_register *src0 = &inst->Src[0];
 378    struct tgsi_full_src_register *src1 = &inst->Src[1];
 379    struct tgsi_full_instruction new_inst;
 380
 381    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 382       /* LG2 tmpA.x, src0.x */
 383       new_inst = tgsi_default_full_instruction();
 384       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 385       new_inst.Instruction.NumDstRegs = 1;
 386       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 387       new_inst.Instruction.NumSrcRegs = 1;
 388       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
 389       tctx->emit_instruction(tctx, &new_inst);
 390
 391       /* MUL tmpA.x, src1.x, tmpA.x */
 392       new_inst = tgsi_default_full_instruction();
 393       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 394       new_inst.Instruction.NumDstRegs = 1;
 395       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 396       new_inst.Instruction.NumSrcRegs = 2;
 397       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
 398       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 399       tctx->emit_instruction(tctx, &new_inst);
 400
 401       /* EX2 dst, tmpA.x */
 402       new_inst = tgsi_default_full_instruction();
 403       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 404       new_inst.Instruction.NumDstRegs = 1;
 405       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 406       new_inst.Instruction.NumSrcRegs = 1;
 407       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 408       tctx->emit_instruction(tctx, &new_inst);
 409    }
 410 }
 411
 412 /* LIT - Light Coefficients
 413  *  dst.x = 1.0
 414  *  dst.y = max(src.x, 0.0)
 415  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
 416  *  dst.w = 1.0
 417  *
 418  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
 419  * MAX tmpA.xy, src.xy, imm{0.0}
 420  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
 421  * LG2 tmpA.y, tmpA.y
 422  * MUL tmpA.y, tmpA.z, tmpA.y
 423  * EX2 tmpA.y, tmpA.y
 424  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
 425  * MOV dst.yz, tmpA.xy
 426  * MOV dst.xw, imm{1.0}
 427  */
 428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
 429                 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
 430 #define LIT_TMP  1
 431 static void
 432 transform_lit(struct tgsi_transform_context *tctx,
 433               struct tgsi_full_instruction *inst)
 434 {
 435    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 436    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 437    struct tgsi_full_src_register *src = &inst->Src[0];
 438    struct tgsi_full_instruction new_inst;
 439
 440    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
 441       /* MAX tmpA.xy, src.xy, imm{0.0} */
 442       new_inst = tgsi_default_full_instruction();
 443       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
 444       new_inst.Instruction.NumDstRegs = 1;
 445       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
 446       new_inst.Instruction.NumSrcRegs = 2;
 447       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
 448       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
 449       tctx->emit_instruction(tctx, &new_inst);
 450
 451       /* MIN tmpA.z, src.w, imm{128.0} */
 452       new_inst = tgsi_default_full_instruction();
 453       new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
 454       new_inst.Instruction.NumDstRegs = 1;
 455       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 456       new_inst.Instruction.NumSrcRegs = 2;
 457       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
 458       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
 459       tctx->emit_instruction(tctx, &new_inst);
 460
 461       /* MAX tmpA.z, tmpA.z, -imm{128.0} */
 462       new_inst = tgsi_default_full_instruction();
 463       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
 464       new_inst.Instruction.NumDstRegs = 1;
 465       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 466       new_inst.Instruction.NumSrcRegs = 2;
 467       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
 468       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
 469       new_inst.Src[1].Register.Negate = true;
 470       tctx->emit_instruction(tctx, &new_inst);
 471
 472       /* LG2 tmpA.y, tmpA.y */
 473       new_inst = tgsi_default_full_instruction();
 474       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 475       new_inst.Instruction.NumDstRegs = 1;
 476       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 477       new_inst.Instruction.NumSrcRegs = 1;
 478       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 479       tctx->emit_instruction(tctx, &new_inst);
 480
 481       /* MUL tmpA.y, tmpA.z, tmpA.y */
 482       new_inst = tgsi_default_full_instruction();
 483       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 484       new_inst.Instruction.NumDstRegs = 1;
 485       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 486       new_inst.Instruction.NumSrcRegs = 2;
 487       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
 488       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
 489       tctx->emit_instruction(tctx, &new_inst);
 490
 491       /* EX2 tmpA.y, tmpA.y */
 492       new_inst = tgsi_default_full_instruction();
 493       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 494       new_inst.Instruction.NumDstRegs = 1;
 495       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 496       new_inst.Instruction.NumSrcRegs = 1;
 497       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 498       tctx->emit_instruction(tctx, &new_inst);
 499
 500       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
 501       new_inst = tgsi_default_full_instruction();
 502       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
 503       new_inst.Instruction.NumDstRegs = 1;
 504       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 505       new_inst.Instruction.NumSrcRegs = 3;
 506       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 507       new_inst.Src[0].Register.Negate = true;
 508       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
 509       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
 510       tctx->emit_instruction(tctx, &new_inst);
 511
 512       /* MOV dst.yz, tmpA.xy */
 513       new_inst = tgsi_default_full_instruction();
 514       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 515       new_inst.Instruction.NumDstRegs = 1;
 516       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
 517       new_inst.Instruction.NumSrcRegs = 1;
 518       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
 519       tctx->emit_instruction(tctx, &new_inst);
 520    }
 521
 522    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
 523       /* MOV dst.xw, imm{1.0} */
 524       new_inst = tgsi_default_full_instruction();
 525       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 526       new_inst.Instruction.NumDstRegs = 1;
 527       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
 528       new_inst.Instruction.NumSrcRegs = 1;
 529       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
 530       tctx->emit_instruction(tctx, &new_inst);
 531    }
 532 }
 533
 534 /* EXP - Approximate Exponential Base 2
 535  *  dst.x = 2^{\lfloor src.x\rfloor}
 536  *  dst.y = src.x - \lfloor src.x\rfloor
 537  *  dst.z = 2^{src.x}
 538  *  dst.w = 1.0
 539  *
 540  * ; needs: 1 tmp, imm{1.0}
 541  * if (lowering FLR) {
 542  *   FRC tmpA.x, src.x
 543  *   SUB tmpA.x, src.x, tmpA.x
 544  * } else {
 545  *   FLR tmpA.x, src.x
 546  * }
 547  * EX2 tmpA.y, src.x
 548  * SUB dst.y, src.x, tmpA.x
 549  * EX2 dst.x, tmpA.x
 550  * MOV dst.z, tmpA.y
 551  * MOV dst.w, imm{1.0}
 552  */
 553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
 554                 NINST(1)+ NINST(1) - OINST(1))
 555 #define EXP_TMP  1
 556 static void
 557 transform_exp(struct tgsi_transform_context *tctx,
 558               struct tgsi_full_instruction *inst)
 559 {
 560    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 561    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 562    struct tgsi_full_src_register *src = &inst->Src[0];
 563    struct tgsi_full_instruction new_inst;
 564
 565    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 566       if (ctx->config->lower_FLR) {
 567          /* FRC tmpA.x, src.x */
 568          new_inst = tgsi_default_full_instruction();
 569          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
 570          new_inst.Instruction.NumDstRegs = 1;
 571          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 572          new_inst.Instruction.NumSrcRegs = 1;
 573          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 574          tctx->emit_instruction(tctx, &new_inst);
 575
 576          /* SUB tmpA.x, src.x, tmpA.x */
 577          new_inst = tgsi_default_full_instruction();
 578          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 579          new_inst.Instruction.NumDstRegs = 1;
 580          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 581          new_inst.Instruction.NumSrcRegs = 2;
 582          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 583          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 584          new_inst.Src[1].Register.Negate = 1;
 585          tctx->emit_instruction(tctx, &new_inst);
 586      } else {
 587          /* FLR tmpA.x, src.x */
 588          new_inst = tgsi_default_full_instruction();
 589          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 590          new_inst.Instruction.NumDstRegs = 1;
 591          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 592          new_inst.Instruction.NumSrcRegs = 1;
 593          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 594          tctx->emit_instruction(tctx, &new_inst);
 595       }
 596    }
 597
 598    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 599       /* EX2 tmpA.y, src.x */
 600       new_inst = tgsi_default_full_instruction();
 601       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 602       new_inst.Instruction.NumDstRegs = 1;
 603       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 604       new_inst.Instruction.NumSrcRegs = 1;
 605       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 606       tctx->emit_instruction(tctx, &new_inst);
 607    }
 608
 609    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 610       /* SUB dst.y, src.x, tmpA.x */
 611       new_inst = tgsi_default_full_instruction();
 612       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 613       new_inst.Instruction.NumDstRegs = 1;
 614       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 615       new_inst.Instruction.NumSrcRegs = 2;
 616       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 617       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 618       new_inst.Src[1].Register.Negate = 1;
 619       tctx->emit_instruction(tctx, &new_inst);
 620    }
 621
 622    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
 623       /* EX2 dst.x, tmpA.x */
 624       new_inst = tgsi_default_full_instruction();
 625       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 626       new_inst.Instruction.NumDstRegs = 1;
 627       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
 628       new_inst.Instruction.NumSrcRegs = 1;
 629       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
 630       tctx->emit_instruction(tctx, &new_inst);
 631    }
 632
 633    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
 634       /* MOV dst.z, tmpA.y */
 635       new_inst = tgsi_default_full_instruction();
 636       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 637       new_inst.Instruction.NumDstRegs = 1;
 638       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
 639       new_inst.Instruction.NumSrcRegs = 1;
 640       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
 641       tctx->emit_instruction(tctx, &new_inst);
 642    }
 643
 644    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 645       /* MOV dst.w, imm{1.0} */
 646       new_inst = tgsi_default_full_instruction();
 647       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 648       new_inst.Instruction.NumDstRegs = 1;
 649       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 650       new_inst.Instruction.NumSrcRegs = 1;
 651       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
 652       tctx->emit_instruction(tctx, &new_inst);
 653    }
 654 }
 655
 656 /* LOG - Approximate Logarithm Base 2
 657  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
 658  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 659  *  dst.z = \log_2{|src.x|}
 660  *  dst.w = 1.0
 661  *
 662  * ; needs: 1 tmp, imm{1.0}
 663  * LG2 tmpA.x, |src.x|
 664  * if (lowering FLR) {
 665  *   FRC tmpA.y, tmpA.x
 666  *   SUB tmpA.y, tmpA.x, tmpA.y
 667  * } else {
 668  *   FLR tmpA.y, tmpA.x
 669  * }
 670  * EX2 tmpA.z, tmpA.y
 671  * RCP tmpA.z, tmpA.z
 672  * MUL dst.y, |src.x|, tmpA.z
 673  * MOV dst.xz, tmpA.yx
 674  * MOV dst.w, imm{1.0}
 675  */
 676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
 677                 NINST(2) + NINST(1) + NINST(1) - OINST(1))
 678 #define LOG_TMP  1
 679 static void
 680 transform_log(struct tgsi_transform_context *tctx,
 681               struct tgsi_full_instruction *inst)
 682 {
 683    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 684    struct tgsi_full_dst_register *dst = &inst->Dst[0];
 685    struct tgsi_full_src_register *src = &inst->Src[0];
 686    struct tgsi_full_instruction new_inst;
 687
 688    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
 689       /* LG2 tmpA.x, |src.x| */
 690       new_inst = tgsi_default_full_instruction();
 691       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
 692       new_inst.Instruction.NumDstRegs = 1;
 693       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 694       new_inst.Instruction.NumSrcRegs = 1;
 695       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
 696       new_inst.Src[0].Register.Absolute = true;
 697       tctx->emit_instruction(tctx, &new_inst);
 698    }
 699
 700    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
 701       if (ctx->config->lower_FLR) {
 702          /* FRC tmpA.y, tmpA.x */
 703          new_inst = tgsi_default_full_instruction();
 704          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
 705          new_inst.Instruction.NumDstRegs = 1;
 706          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 707          new_inst.Instruction.NumSrcRegs = 1;
 708          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 709          tctx->emit_instruction(tctx, &new_inst);
 710
 711          /* SUB tmpA.y, tmpA.x, tmpA.y */
 712          new_inst = tgsi_default_full_instruction();
 713          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 714          new_inst.Instruction.NumDstRegs = 1;
 715          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 716          new_inst.Instruction.NumSrcRegs = 2;
 717          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 718          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
 719          new_inst.Src[1].Register.Negate = 1;
 720          tctx->emit_instruction(tctx, &new_inst);
 721       } else {
 722          /* FLR tmpA.y, tmpA.x */
 723          new_inst = tgsi_default_full_instruction();
 724          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 725          new_inst.Instruction.NumDstRegs = 1;
 726          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
 727          new_inst.Instruction.NumSrcRegs = 1;
 728          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
 729          tctx->emit_instruction(tctx, &new_inst);
 730       }
 731    }
 732
 733    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
 734       /* EX2 tmpA.z, tmpA.y */
 735       new_inst = tgsi_default_full_instruction();
 736       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
 737       new_inst.Instruction.NumDstRegs = 1;
 738       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 739       new_inst.Instruction.NumSrcRegs = 1;
 740       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
 741       tctx->emit_instruction(tctx, &new_inst);
 742
 743       /* RCP tmpA.z, tmpA.z */
 744       new_inst = tgsi_default_full_instruction();
 745       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
 746       new_inst.Instruction.NumDstRegs = 1;
 747       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
 748       new_inst.Instruction.NumSrcRegs = 1;
 749       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
 750       tctx->emit_instruction(tctx, &new_inst);
 751
 752       /* MUL dst.y, |src.x|, tmpA.z */
 753       new_inst = tgsi_default_full_instruction();
 754       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 755       new_inst.Instruction.NumDstRegs = 1;
 756       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
 757       new_inst.Instruction.NumSrcRegs = 2;
 758       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
 759       new_inst.Src[0].Register.Absolute = true;
 760       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
 761       tctx->emit_instruction(tctx, &new_inst);
 762    }
 763
 764    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
 765       /* MOV dst.xz, tmpA.yx */
 766       new_inst = tgsi_default_full_instruction();
 767       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 768       new_inst.Instruction.NumDstRegs = 1;
 769       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
 770       new_inst.Instruction.NumSrcRegs = 1;
 771       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
 772       tctx->emit_instruction(tctx, &new_inst);
 773    }
 774
 775    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
 776       /* MOV dst.w, imm{1.0} */
 777       new_inst = tgsi_default_full_instruction();
 778       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
 779       new_inst.Instruction.NumDstRegs = 1;
 780       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
 781       new_inst.Instruction.NumSrcRegs = 1;
 782       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
 783       tctx->emit_instruction(tctx, &new_inst);
 784    }
 785 }
 786
 787 /* DP4 - 4-component Dot Product
 788  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 789  *
 790  * DP3 - 3-component Dot Product
 791  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 792  *
 793  * DP2 - 2-component Dot Product
 794  *   dst = src0.x \times src1.x + src0.y \times src1.y
 795  *
 796  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
 797  * operations, which is what you'd prefer for a ISA that is natively
 798  * scalar.  Probably a native vector ISA would at least already have
 799  * DP4/DP3 instructions, but perhaps there is room for an alternative
 800  * translation for DP2 using vector instructions.
 801  *
 802  * ; needs: 1 tmp
 803  * MUL tmpA.x, src0.x, src1.x
 804  * MAD tmpA.x, src0.y, src1.y, tmpA.x
 805  * if (DP3 || DP4) {
 806  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
 807  *   if (DP4) {
 808  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
 809  *   }
 810  * }
 811  * ; fixup last instruction to replicate into dst
 812  */
 813 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
 814 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
 815 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
 816 #define DOTP_TMP  1
 817 static void
 818 transform_dotp(struct tgsi_transform_context *tctx,
 819                struct tgsi_full_instruction *inst)
 820 {
 821    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 822    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 823    struct tgsi_full_src_register *src0 = &inst->Src[0];
 824    struct tgsi_full_src_register *src1 = &inst->Src[1];
 825    struct tgsi_full_instruction new_inst;
 826    enum tgsi_opcode opcode = inst->Instruction.Opcode;
 827
 828    /* NOTE: any potential last instruction must replicate src on all
 829     * components (since it could be re-written to write to final dst)
 830     */
 831
 832    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 833       /* MUL tmpA.x, src0.x, src1.x */
 834       new_inst = tgsi_default_full_instruction();
 835       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
 836       new_inst.Instruction.NumDstRegs = 1;
 837       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 838       new_inst.Instruction.NumSrcRegs = 2;
 839       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
 840       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
 841       tctx->emit_instruction(tctx, &new_inst);
 842
 843       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
 844       new_inst = tgsi_default_full_instruction();
 845       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 846       new_inst.Instruction.NumDstRegs = 1;
 847       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 848       new_inst.Instruction.NumSrcRegs = 3;
 849       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
 850       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
 851       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 852
 853       if ((opcode == TGSI_OPCODE_DP3) ||
 854           (opcode == TGSI_OPCODE_DP4)) {
 855          tctx->emit_instruction(tctx, &new_inst);
 856
 857          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
 858          new_inst = tgsi_default_full_instruction();
 859          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 860          new_inst.Instruction.NumDstRegs = 1;
 861          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 862          new_inst.Instruction.NumSrcRegs = 3;
 863          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
 864          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
 865          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 866
 867          if (opcode == TGSI_OPCODE_DP4) {
 868             tctx->emit_instruction(tctx, &new_inst);
 869
 870             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
 871             new_inst = tgsi_default_full_instruction();
 872             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
 873             new_inst.Instruction.NumDstRegs = 1;
 874             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
 875             new_inst.Instruction.NumSrcRegs = 3;
 876             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
 877             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
 878             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
 879          }
 880       }
 881
 882       /* fixup last instruction to write to dst: */
 883       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 884
 885       tctx->emit_instruction(tctx, &new_inst);
 886    }
 887 }
 888
 889 /* FLR - floor, CEIL - ceil
 890  * ; needs: 1 tmp
 891  * if (CEIL) {
 892  *   FRC tmpA, -src
 893  *   ADD dst, src, tmpA
 894  * } else {
 895  *   FRC tmpA, src
 896  *   SUB dst, src, tmpA
 897  * }
 898  */
 899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
 900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
 901 #define FLR_TMP 1
 902 #define CEIL_TMP 1
 903 static void
 904 transform_flr_ceil(struct tgsi_transform_context *tctx,
 905                    struct tgsi_full_instruction *inst)
 906 {
 907    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 908    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 909    struct tgsi_full_src_register *src0 = &inst->Src[0];
 910    struct tgsi_full_instruction new_inst;
 911    enum tgsi_opcode opcode = inst->Instruction.Opcode;
 912
 913    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 914       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
 915       new_inst = tgsi_default_full_instruction();
 916       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
 917       new_inst.Instruction.NumDstRegs = 1;
 918       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 919       new_inst.Instruction.NumSrcRegs = 1;
 920       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 921
 922       if (opcode == TGSI_OPCODE_CEIL)
 923          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
 924       tctx->emit_instruction(tctx, &new_inst);
 925
 926       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
 927       new_inst = tgsi_default_full_instruction();
 928       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 929       new_inst.Instruction.NumDstRegs = 1;
 930       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
 931       new_inst.Instruction.NumSrcRegs = 2;
 932       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 933       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 934       if (opcode == TGSI_OPCODE_FLR)
 935          new_inst.Src[1].Register.Negate = 1;
 936       tctx->emit_instruction(tctx, &new_inst);
 937    }
 938 }
 939
 940 /* TRUNC - truncate off fractional part
 941  *  dst.x = trunc(src.x)
 942  *  dst.y = trunc(src.y)
 943  *  dst.z = trunc(src.z)
 944  *  dst.w = trunc(src.w)
 945  *
 946  * ; needs: 1 tmp
 947  * if (lower FLR) {
 948  *   FRC tmpA, |src|
 949  *   SUB tmpA, |src|, tmpA
 950  * } else {
 951  *   FLR tmpA, |src|
 952  * }
 953  * CMP dst, src, -tmpA, tmpA
 954  */
 955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
 956 #define TRUNC_TMP 1
 957 static void
 958 transform_trunc(struct tgsi_transform_context *tctx,
 959                 struct tgsi_full_instruction *inst)
 960 {
 961    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
 962    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
 963    struct tgsi_full_src_register *src0 = &inst->Src[0];
 964    struct tgsi_full_instruction new_inst;
 965
 966    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
 967       if (ctx->config->lower_FLR) {
 968          new_inst = tgsi_default_full_instruction();
 969          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
 970          new_inst.Instruction.NumDstRegs = 1;
 971          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 972          new_inst.Instruction.NumSrcRegs = 1;
 973          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 974          new_inst.Src[0].Register.Absolute = true;
 975          new_inst.Src[0].Register.Negate = false;
 976          tctx->emit_instruction(tctx, &new_inst);
 977
 978          new_inst = tgsi_default_full_instruction();
 979          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
 980          new_inst.Instruction.NumDstRegs = 1;
 981          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 982          new_inst.Instruction.NumSrcRegs = 2;
 983          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 984          new_inst.Src[0].Register.Absolute = true;
 985          new_inst.Src[0].Register.Negate = false;
 986          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
 987          new_inst.Src[1].Register.Negate = 1;
 988          tctx->emit_instruction(tctx, &new_inst);
 989       } else {
 990          new_inst = tgsi_default_full_instruction();
 991          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
 992          new_inst.Instruction.NumDstRegs = 1;
 993          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
 994          new_inst.Instruction.NumSrcRegs = 1;
 995          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
 996          new_inst.Src[0].Register.Absolute = true;
 997          new_inst.Src[0].Register.Negate = false;
 998          tctx->emit_instruction(tctx, &new_inst);
 999       }
1000
1001       new_inst = tgsi_default_full_instruction();
1002       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1003       new_inst.Instruction.NumDstRegs = 1;
1004       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1005       new_inst.Instruction.NumSrcRegs = 3;
1006       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1007       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1008       new_inst.Src[1].Register.Negate = true;
1009       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1010       tctx->emit_instruction(tctx, &new_inst);
1011    }
1012 }
1013
1014 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1015  * in the case of TXP, the clamping must happen *after* projection, so
1016  * we need to lower TXP to TEX.
1017  *
1018  *   MOV tmpA, src0
1019  *   if (opc == TXP) {
1020  *     ; do perspective division manually before clamping:
1021  *     RCP tmpB, tmpA.w
1022  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1023  *     opc = TEX;
1024  *   }
1025  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1026  *   <opc> dst, tmpA, ...
1027  */
1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1029 #define SAMP_TMP  2
1030 static int
1031 transform_samp(struct tgsi_transform_context *tctx,
1032                struct tgsi_full_instruction *inst)
1033 {
1034    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1035    struct tgsi_full_src_register *coord = &inst->Src[0];
1036    struct tgsi_full_src_register *samp;
1037    struct tgsi_full_instruction new_inst;
1038    /* mask is clamped coords, pmask is all coords (for projection): */
1039    unsigned mask = 0, pmask = 0, smask;
1040    unsigned tex = inst->Texture.Texture;
1041    enum tgsi_opcode opcode = inst->Instruction.Opcode;
1042    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1043                    (ctx->config->lower_TXP & (1 << tex));
1044
1045    if (opcode == TGSI_OPCODE_TXB2) {
1046       samp = &inst->Src[2];
1047    } else {
1048       samp = &inst->Src[1];
1049    }
1050
1051    /* convert sampler # to bitmask to test: */
1052    smask = 1 << samp->Register.Index;
1053
1054    /* check if we actually need to lower this one: */
1055    if (!(ctx->saturate & smask) && !lower_txp)
1056       return -1;
1057
1058    /* figure out which coordinates need saturating:
1059     *   - RECT textures should not get saturated
1060     *   - array index coords should not get saturated
1061     */
1062    switch (tex) {
1063    case TGSI_TEXTURE_3D:
1064    case TGSI_TEXTURE_CUBE:
1065    case TGSI_TEXTURE_CUBE_ARRAY:
1066    case TGSI_TEXTURE_SHADOWCUBE:
1067    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1068       if (ctx->config->saturate_r & smask)
1069          mask |= TGSI_WRITEMASK_Z;
1070       pmask |= TGSI_WRITEMASK_Z;
1071       /* fallthrough */
1072
1073    case TGSI_TEXTURE_2D:
1074    case TGSI_TEXTURE_2D_ARRAY:
1075    case TGSI_TEXTURE_SHADOW2D:
1076    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1077    case TGSI_TEXTURE_2D_MSAA:
1078    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1079       if (ctx->config->saturate_t & smask)
1080          mask |= TGSI_WRITEMASK_Y;
1081       pmask |= TGSI_WRITEMASK_Y;
1082       /* fallthrough */
1083
1084    case TGSI_TEXTURE_1D:
1085    case TGSI_TEXTURE_1D_ARRAY:
1086    case TGSI_TEXTURE_SHADOW1D:
1087    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1088       if (ctx->config->saturate_s & smask)
1089          mask |= TGSI_WRITEMASK_X;
1090       pmask |= TGSI_WRITEMASK_X;
1091       break;
1092
1093    case TGSI_TEXTURE_RECT:
1094    case TGSI_TEXTURE_SHADOWRECT:
1095       /* we don't saturate, but in case of lower_txp we
1096        * still need to do the perspective divide:
1097        */
1098        pmask = TGSI_WRITEMASK_XY;
1099        break;
1100    }
1101
1102    /* sanity check.. driver could be asking to saturate a non-
1103     * existent coordinate component:
1104     */
1105    if (!mask && !lower_txp)
1106       return -1;
1107
1108    /* MOV tmpA, src0 */
1109    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1110
1111    /* This is a bit sad.. we need to clamp *after* the coords
1112     * are projected, which means lowering TXP to TEX and doing
1113     * the projection ourself.  But since I haven't figured out
1114     * how to make the lowering code deliver an electric shock
1115     * to anyone using GL_CLAMP, we must do this instead:
1116     */
1117    if (opcode == TGSI_OPCODE_TXP) {
1118       /* RCP tmpB.x tmpA.w */
1119       new_inst = tgsi_default_full_instruction();
1120       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1121       new_inst.Instruction.NumDstRegs = 1;
1122       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1123       new_inst.Instruction.NumSrcRegs = 1;
1124       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1125       tctx->emit_instruction(tctx, &new_inst);
1126
1127       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1128       new_inst = tgsi_default_full_instruction();
1129       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1130       new_inst.Instruction.NumDstRegs = 1;
1131       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1132       new_inst.Instruction.NumSrcRegs = 2;
1133       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1134       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1135       tctx->emit_instruction(tctx, &new_inst);
1136
1137       opcode = TGSI_OPCODE_TEX;
1138    }
1139
1140    /* MOV_SAT tmpA.<mask>, tmpA */
1141    if (mask) {
1142       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1143    }
1144
1145    /* modify the texture samp instruction to take fixed up coord: */
1146    new_inst = *inst;
1147    new_inst.Instruction.Opcode = opcode;
1148    new_inst.Src[0] = ctx->tmp[A].src;
1149    tctx->emit_instruction(tctx, &new_inst);
1150
1151    return 0;
1152 }
1153
1154 /* Two-sided color emulation:
1155  * For each COLOR input, create a corresponding BCOLOR input, plus
1156  * CMP instruction to select front or back color based on FACE
1157  */
1158 #define TWOSIDE_GROW(n)  (                      \
1159       2 +         /* FACE */                    \
1160       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1161       ((n) * 1) + /* TEMP[] */                  \
1162       ((n) * NINST(3))   /* CMP instr */        \
1163       )
1164
1165 static void
1166 emit_twoside(struct tgsi_transform_context *tctx)
1167 {
1168    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1169    struct tgsi_shader_info *info = ctx->info;
1170    struct tgsi_full_declaration decl;
1171    struct tgsi_full_instruction new_inst;
1172    unsigned inbase, tmpbase;
1173    unsigned i;
1174
1175    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1176    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1177
1178    /* additional inputs for BCOLOR's */
1179    for (i = 0; i < ctx->two_side_colors; i++) {
1180       unsigned in_idx = ctx->two_side_idx[i];
1181       decl = tgsi_default_full_declaration();
1182       decl.Declaration.File = TGSI_FILE_INPUT;
1183       decl.Declaration.Semantic = true;
1184       decl.Range.First = decl.Range.Last = inbase + i;
1185       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1186       decl.Semantic.Index = info->input_semantic_index[in_idx];
1187       decl.Declaration.Interpolate = true;
1188       decl.Interp.Interpolate = info->input_interpolate[in_idx];
1189       decl.Interp.Location = info->input_interpolate_loc[in_idx];
1190       decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1191       tctx->emit_declaration(tctx, &decl);
1192    }
1193
1194    /* additional input for FACE */
1195    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196       decl = tgsi_default_full_declaration();
1197       decl.Declaration.File = TGSI_FILE_INPUT;
1198       decl.Declaration.Semantic = true;
1199       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201       decl.Semantic.Index = 0;
1202       tctx->emit_declaration(tctx, &decl);
1203
1204       ctx->face_idx = decl.Range.First;
1205    }
1206
1207    /* additional temps for COLOR/BCOLOR selection: */
1208    for (i = 0; i < ctx->two_side_colors; i++) {
1209       decl = tgsi_default_full_declaration();
1210       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212       tctx->emit_declaration(tctx, &decl);
1213    }
1214
1215    /* and finally additional instructions to select COLOR/BCOLOR: */
1216    for (i = 0; i < ctx->two_side_colors; i++) {
1217       new_inst = tgsi_default_full_instruction();
1218       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219
1220       new_inst.Instruction.NumDstRegs = 1;
1221       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224
1225       new_inst.Instruction.NumSrcRegs = 3;
1226       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227       new_inst.Src[0].Register.Index = ctx->face_idx;
1228       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233       new_inst.Src[1].Register.Index = inbase + i;
1234       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244
1245       tctx->emit_instruction(tctx, &new_inst);
1246    }
1247 }
1248
1249 static void
1250 emit_decls(struct tgsi_transform_context *tctx)
1251 {
1252    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253    struct tgsi_shader_info *info = ctx->info;
1254    struct tgsi_full_declaration decl;
1255    struct tgsi_full_immediate immed;
1256    unsigned tmpbase;
1257    unsigned i;
1258
1259    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260
1261    ctx->color_base = tmpbase + ctx->numtmp;
1262
1263    /* declare immediate: */
1264    immed = tgsi_default_full_immediate();
1265    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266    immed.u[0].Float = 0.0;
1267    immed.u[1].Float = 1.0;
1268    immed.u[2].Float = 128.0;
1269    immed.u[3].Float = 0.0;
1270    tctx->emit_immediate(tctx, &immed);
1271
1272    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273    ctx->imm.Register.Index = info->immediate_count;
1274    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278
1279    /* declare temp regs: */
1280    for (i = 0; i < ctx->numtmp; i++) {
1281       decl = tgsi_default_full_declaration();
1282       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283       decl.Range.First = decl.Range.Last = tmpbase + i;
1284       tctx->emit_declaration(tctx, &decl);
1285
1286       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287       ctx->tmp[i].src.Register.Index = tmpbase + i;
1288       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292
1293       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294       ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296    }
1297
1298    if (ctx->two_side_colors)
1299       emit_twoside(tctx);
1300 }
1301
1302 static void
1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
1304                     struct tgsi_full_instruction *inst)
1305 {
1306    unsigned i, j;
1307    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308       struct tgsi_src_register *src = &inst->Src[i].Register;
1309       if (src->File == TGSI_FILE_INPUT) {
1310          for (j = 0; j < ctx->two_side_colors; j++) {
1311             if (src->Index == (int)ctx->two_side_idx[j]) {
1312                src->File = TGSI_FILE_TEMPORARY;
1313                src->Index = ctx->color_base + j;
1314                break;
1315             }
1316          }
1317       }
1318    }
1319
1320 }
1321
1322 static void
1323 transform_instr(struct tgsi_transform_context *tctx,
1324                 struct tgsi_full_instruction *inst)
1325 {
1326    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327
1328    if (!ctx->emitted_decls) {
1329       emit_decls(tctx);
1330       ctx->emitted_decls = 1;
1331    }
1332
1333    /* if emulating two-sided-color, we need to re-write some
1334     * src registers:
1335     */
1336    if (ctx->two_side_colors)
1337       rename_color_inputs(ctx, inst);
1338
1339    switch (inst->Instruction.Opcode) {
1340    case TGSI_OPCODE_DST:
1341       if (!ctx->config->lower_DST)
1342          goto skip;
1343       transform_dst(tctx, inst);
1344       break;
1345    case TGSI_OPCODE_LRP:
1346       if (!ctx->config->lower_LRP)
1347          goto skip;
1348       transform_lrp(tctx, inst);
1349       break;
1350    case TGSI_OPCODE_FRC:
1351       if (!ctx->config->lower_FRC)
1352          goto skip;
1353       transform_frc(tctx, inst);
1354       break;
1355    case TGSI_OPCODE_POW:
1356       if (!ctx->config->lower_POW)
1357          goto skip;
1358       transform_pow(tctx, inst);
1359       break;
1360    case TGSI_OPCODE_LIT:
1361       if (!ctx->config->lower_LIT)
1362          goto skip;
1363       transform_lit(tctx, inst);
1364       break;
1365    case TGSI_OPCODE_EXP:
1366       if (!ctx->config->lower_EXP)
1367          goto skip;
1368       transform_exp(tctx, inst);
1369       break;
1370    case TGSI_OPCODE_LOG:
1371       if (!ctx->config->lower_LOG)
1372          goto skip;
1373       transform_log(tctx, inst);
1374       break;
1375    case TGSI_OPCODE_DP4:
1376       if (!ctx->config->lower_DP4)
1377          goto skip;
1378       transform_dotp(tctx, inst);
1379       break;
1380    case TGSI_OPCODE_DP3:
1381       if (!ctx->config->lower_DP3)
1382          goto skip;
1383       transform_dotp(tctx, inst);
1384       break;
1385    case TGSI_OPCODE_DP2:
1386       if (!ctx->config->lower_DP2)
1387          goto skip;
1388       transform_dotp(tctx, inst);
1389       break;
1390    case TGSI_OPCODE_FLR:
1391       if (!ctx->config->lower_FLR)
1392          goto skip;
1393       transform_flr_ceil(tctx, inst);
1394       break;
1395    case TGSI_OPCODE_CEIL:
1396       if (!ctx->config->lower_CEIL)
1397          goto skip;
1398       transform_flr_ceil(tctx, inst);
1399       break;
1400    case TGSI_OPCODE_TRUNC:
1401       if (!ctx->config->lower_TRUNC)
1402          goto skip;
1403       transform_trunc(tctx, inst);
1404       break;
1405    case TGSI_OPCODE_TEX:
1406    case TGSI_OPCODE_TXP:
1407    case TGSI_OPCODE_TXB:
1408    case TGSI_OPCODE_TXB2:
1409    case TGSI_OPCODE_TXL:
1410       if (transform_samp(tctx, inst))
1411          goto skip;
1412       break;
1413    default:
1414    skip:
1415       tctx->emit_instruction(tctx, inst);
1416       break;
1417    }
1418 }
1419
1420 /* returns NULL if no lowering required, else returns the new
1421  * tokens (which caller is required to free()).  In either case
1422  * returns the current info.
1423  */
1424 const struct tgsi_token *
1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426                         const struct tgsi_token *tokens,
1427                         struct tgsi_shader_info *info)
1428 {
1429    struct tgsi_lowering_context ctx;
1430    struct tgsi_token *newtoks;
1431    int newlen, numtmp;
1432
1433    /* sanity check in case limit is ever increased: */
1434    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435
1436    /* sanity check the lowering */
1437    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438    assert(!(config->lower_FRC && config->lower_TRUNC));
1439
1440    memset(&ctx, 0, sizeof(ctx));
1441    ctx.base.transform_instruction = transform_instr;
1442    ctx.info = info;
1443    ctx.config = config;
1444
1445    tgsi_scan_shader(tokens, info);
1446
1447    /* if we are adding fragment shader support to emulate two-sided
1448     * color, then figure out the number of additional inputs we need
1449     * to create for BCOLOR's..
1450     */
1451    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452        config->color_two_side) {
1453       int i;
1454       ctx.face_idx = -1;
1455       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457             ctx.two_side_idx[ctx.two_side_colors++] = i;
1458          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459             ctx.face_idx = i;
1460       }
1461    }
1462
1463    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466    /* if there are no instructions to lower, then we are done: */
1467    if (!(OPCS(DST) ||
1468          OPCS(LRP) ||
1469          OPCS(FRC) ||
1470          OPCS(POW) ||
1471          OPCS(LIT) ||
1472          OPCS(EXP) ||
1473          OPCS(LOG) ||
1474          OPCS(DP4) ||
1475          OPCS(DP3) ||
1476          OPCS(DP2) ||
1477          OPCS(FLR) ||
1478          OPCS(CEIL) ||
1479          OPCS(TRUNC) ||
1480          OPCS(TXP) ||
1481          ctx.two_side_colors ||
1482          ctx.saturate))
1483       return NULL;
1484
1485 #if 0  /* debug */
1486    _debug_printf("BEFORE:");
1487    tgsi_dump(tokens, 0);
1488 #endif
1489
1490    numtmp = 0;
1491    newlen = tgsi_num_tokens(tokens);
1492    if (OPCS(DST)) {
1493       newlen += DST_GROW * OPCS(DST);
1494       numtmp = MAX2(numtmp, DST_TMP);
1495    }
1496    if (OPCS(LRP)) {
1497       newlen += LRP_GROW * OPCS(LRP);
1498       numtmp = MAX2(numtmp, LRP_TMP);
1499    }
1500    if (OPCS(FRC)) {
1501       newlen += FRC_GROW * OPCS(FRC);
1502       numtmp = MAX2(numtmp, FRC_TMP);
1503    }
1504    if (OPCS(POW)) {
1505       newlen += POW_GROW * OPCS(POW);
1506       numtmp = MAX2(numtmp, POW_TMP);
1507    }
1508    if (OPCS(LIT)) {
1509       newlen += LIT_GROW * OPCS(LIT);
1510       numtmp = MAX2(numtmp, LIT_TMP);
1511    }
1512    if (OPCS(EXP)) {
1513       newlen += EXP_GROW * OPCS(EXP);
1514       numtmp = MAX2(numtmp, EXP_TMP);
1515    }
1516    if (OPCS(LOG)) {
1517       newlen += LOG_GROW * OPCS(LOG);
1518       numtmp = MAX2(numtmp, LOG_TMP);
1519    }
1520    if (OPCS(DP4)) {
1521       newlen += DP4_GROW * OPCS(DP4);
1522       numtmp = MAX2(numtmp, DOTP_TMP);
1523    }
1524    if (OPCS(DP3)) {
1525       newlen += DP3_GROW * OPCS(DP3);
1526       numtmp = MAX2(numtmp, DOTP_TMP);
1527    }
1528    if (OPCS(DP2)) {
1529       newlen += DP2_GROW * OPCS(DP2);
1530       numtmp = MAX2(numtmp, DOTP_TMP);
1531    }
1532    if (OPCS(FLR)) {
1533       newlen += FLR_GROW * OPCS(FLR);
1534       numtmp = MAX2(numtmp, FLR_TMP);
1535    }
1536    if (OPCS(CEIL)) {
1537       newlen += CEIL_GROW * OPCS(CEIL);
1538       numtmp = MAX2(numtmp, CEIL_TMP);
1539    }
1540    if (OPCS(TRUNC)) {
1541       newlen += TRUNC_GROW * OPCS(TRUNC);
1542       numtmp = MAX2(numtmp, TRUNC_TMP);
1543    }
1544    if (ctx.saturate || config->lower_TXP) {
1545       int n = 0;
1546
1547       if (ctx.saturate) {
1548          n = info->opcode_count[TGSI_OPCODE_TEX] +
1549             info->opcode_count[TGSI_OPCODE_TXP] +
1550             info->opcode_count[TGSI_OPCODE_TXB] +
1551             info->opcode_count[TGSI_OPCODE_TXB2] +
1552             info->opcode_count[TGSI_OPCODE_TXL];
1553       } else if (config->lower_TXP) {
1554           n = info->opcode_count[TGSI_OPCODE_TXP];
1555       }
1556
1557       newlen += SAMP_GROW * n;
1558       numtmp = MAX2(numtmp, SAMP_TMP);
1559    }
1560
1561    /* specifically don't include two_side_colors temps in the count: */
1562    ctx.numtmp = numtmp;
1563
1564    if (ctx.two_side_colors) {
1565       newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566       /* note: we permanently consume temp regs, re-writing references
1567        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568        * instruction that selects which varying to use):
1569        */
1570       numtmp += ctx.two_side_colors;
1571    }
1572
1573    newlen += 2 * numtmp;
1574    newlen += 5;        /* immediate */
1575
1576    newtoks = tgsi_alloc_tokens(newlen);
1577    if (!newtoks)
1578       return NULL;
1579
1580    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1581
1582    tgsi_scan_shader(newtoks, info);
1583
1584 #if 0  /* debug */
1585    _debug_printf("AFTER:");
1586    tgsi_dump(newtoks, 0);
1587 #endif
1588
1589    return newtoks;
1590 }