src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2011-2012 Advanced Micro Devices, Inc.
   4  * Copyright 2009 VMware, Inc.
   5  * Copyright 2007-2008 VMware, Inc.
   6  * All Rights Reserved.
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a
   9  * copy of this software and associated documentation files (the
  10  * "Software"), to deal in the Software without restriction, including
  11  * without limitation the rights to use, copy, modify, merge, publish,
  12  * distribute, sub license, and/or sell copies of the Software, and to
  13  * permit persons to whom the Software is furnished to do so, subject to
  14  * the following conditions:
  15  *
  16  * The above copyright notice and this permission notice (including the
  17  * next paragraph) shall be included in all copies or substantial portions
  18  * of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  23  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  24  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27  *
  28  **************************************************************************/
  29
  30 /**
  31  * @file
  32  * TGSI to LLVM IR translation.
  33  *
  34  * @author Jose Fonseca <jfonseca@vmware.com>
  35  * @author Tom Stellard <thomas.stellard@amd.com>
  36  *
  37  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
  38  * Brian Paul, and others.
  39  */
  40
  41
  42 #include "lp_bld_tgsi_action.h"
  43
  44 #include "lp_bld_tgsi.h"
  45 #include "lp_bld_arit.h"
  46 #include "lp_bld_bitarit.h"
  47 #include "lp_bld_const.h"
  48 #include "lp_bld_conv.h"
  49 #include "lp_bld_gather.h"
  50 #include "lp_bld_logic.h"
  51 #include "lp_bld_pack.h"
  52
  53 #include "tgsi/tgsi_exec.h"
  54
  55 /* XXX: The CPU only defaults should be repaced by generic ones.  In most
  56  * cases, the CPU defaults are just wrappers around a function in
  57  * lp_build_arit.c and these functions should be inlined here and the CPU
  58  * generic code should be removed and placed elsewhere.
  59  */
  60
  61 /* Default actions */
  62
  63 /* Generic fetch_arg functions */
  64
  65 static void scalar_unary_fetch_args(
  66    struct lp_build_tgsi_context * bld_base,
  67    struct lp_build_emit_data * emit_data)
  68 {
  69    /* src0.x */
  70    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
  71    emit_data->arg_count = 1;
  72    emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
  73 }
  74
  75 static void scalar_binary_fetch_args(
  76    struct lp_build_tgsi_context * bld_base,
  77    struct lp_build_emit_data * emit_data)
  78 {
  79    /* src0.x */
  80    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
  81                                             0, TGSI_CHAN_X);
  82    /* src1.x */
  83    emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
  84                                             1, TGSI_CHAN_X);
  85    emit_data->arg_count = 2;
  86    emit_data->dst_type = LLVMTypeOf(emit_data->args[0]);
  87 }
  88
  89 /* TGSI_OPCODE_ADD */
  90 static void
  91 add_emit(
  92    const struct lp_build_tgsi_action * action,
  93    struct lp_build_tgsi_context * bld_base,
  94    struct lp_build_emit_data * emit_data)
  95 {
  96    emit_data->output[emit_data->chan] = LLVMBuildFAdd(
  97                                 bld_base->base.gallivm->builder,
  98                                 emit_data->args[0], emit_data->args[1], "");
  99 }
 100
 101 /* TGSI_OPCODE_ARR */
 102 static void
 103 arr_emit(
 104    const struct lp_build_tgsi_action * action,
 105    struct lp_build_tgsi_context * bld_base,
 106    struct lp_build_emit_data * emit_data)
 107 {
 108    LLVMValueRef tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ROUND, emit_data->args[0]);
 109    emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
 110                                                         bld_base->uint_bld.vec_type, "");
 111 }
 112
 113 /* TGSI_OPCODE_CLAMP */
 114 static void
 115 clamp_emit(
 116    const struct lp_build_tgsi_action * action,
 117    struct lp_build_tgsi_context * bld_base,
 118    struct lp_build_emit_data * emit_data)
 119 {
 120    LLVMValueRef tmp;
 121    tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
 122                                    emit_data->args[0],
 123                                    emit_data->args[1]);
 124    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 125                                        TGSI_OPCODE_MIN, tmp, emit_data->args[2]);
 126 }
 127
 128 /* DP* Helper */
 129
 130 static void
 131 dp_fetch_args(
 132    struct lp_build_tgsi_context * bld_base,
 133    struct lp_build_emit_data * emit_data,
 134    unsigned dp_components)
 135 {
 136    unsigned chan, src;
 137    for (src = 0; src < 2; src++) {
 138       for (chan = 0; chan < dp_components; chan++) {
 139          emit_data->args[(src * dp_components) + chan] =
 140                      lp_build_emit_fetch(bld_base, emit_data->inst, src, chan);
 141       }
 142    }
 143    emit_data->dst_type = bld_base->base.elem_type;
 144 }
 145
 146 /* TGSI_OPCODE_DP2 */
 147 static void
 148 dp2_fetch_args(
 149    struct lp_build_tgsi_context * bld_base,
 150    struct lp_build_emit_data * emit_data)
 151 {
 152    dp_fetch_args(bld_base, emit_data, 2);
 153 }
 154
 155 static void
 156 dp2_emit(
 157    const struct lp_build_tgsi_action * action,
 158    struct lp_build_tgsi_context * bld_base,
 159    struct lp_build_emit_data * emit_data)
 160 {
 161    LLVMValueRef tmp0, tmp1;
 162    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 163                                     emit_data->args[0] /* src0.x */,
 164                                     emit_data->args[2] /* src1.x */);
 165    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 166                                     emit_data->args[1] /* src0.y */,
 167                                     emit_data->args[3] /* src1.y */);
 168    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 169                                                     TGSI_OPCODE_ADD, tmp0, tmp1);
 170 }
 171
 172 static struct lp_build_tgsi_action dp2_action = {
 173    dp2_fetch_args,       /* fetch_args */
 174    dp2_emit      /* emit */
 175 };
 176
 177 /* TGSI_OPCODE_DP2A */
 178 static void
 179 dp2a_fetch_args(
 180    struct lp_build_tgsi_context * bld_base,
 181    struct lp_build_emit_data * emit_data)
 182 {
 183    dp_fetch_args(bld_base, emit_data, 2);
 184    emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst,
 185                                             2, TGSI_CHAN_X);
 186 }
 187
 188 static void
 189 dp2a_emit(
 190    const struct lp_build_tgsi_action * action,
 191    struct lp_build_tgsi_context * bld_base,
 192    struct lp_build_emit_data * emit_data)
 193 {
 194    LLVMValueRef tmp;
 195    tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data);
 196    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD,
 197                                     emit_data->args[5], tmp);
 198 }
 199
 200 static struct lp_build_tgsi_action dp2a_action = {
 201    dp2a_fetch_args,      /* fetch_args */
 202    dp2a_emit     /* emit */
 203 };
 204
 205 /* TGSI_OPCODE_DP3 */
 206 static void
 207 dp3_fetch_args(
 208    struct lp_build_tgsi_context * bld_base,
 209    struct lp_build_emit_data * emit_data)
 210 {
 211    dp_fetch_args(bld_base, emit_data, 3);
 212 }
 213
 214 static void
 215 dp3_emit(
 216    const struct lp_build_tgsi_action * action,
 217    struct lp_build_tgsi_context * bld_base,
 218    struct lp_build_emit_data * emit_data)
 219 {
 220    LLVMValueRef tmp0, tmp1;
 221    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 222                                     emit_data->args[0] /* src0.x */,
 223                                     emit_data->args[3] /* src1.x */);
 224    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 225                                     emit_data->args[1] /* src0.y */,
 226                                     emit_data->args[4] /* src1.y */);
 227    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0);
 228    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 229                                     emit_data->args[2] /* src0.z */,
 230                                     emit_data->args[5] /* src1.z */);
 231    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 232                                                     TGSI_OPCODE_ADD, tmp0, tmp1);
 233 }
 234
 235 static struct lp_build_tgsi_action dp3_action = {
 236    dp3_fetch_args,       /* fetch_args */
 237    dp3_emit      /* emit */
 238 };
 239
 240 /* TGSI_OPCODDE_DP4 */
 241
 242 static void
 243 dp4_fetch_args(
 244    struct lp_build_tgsi_context * bld_base,
 245    struct lp_build_emit_data * emit_data)
 246 {
 247    dp_fetch_args(bld_base, emit_data, 4);
 248 }
 249
 250 static void
 251 dp4_emit(
 252    const struct lp_build_tgsi_action * action,
 253    struct lp_build_tgsi_context * bld_base,
 254    struct lp_build_emit_data * emit_data)
 255 {
 256    LLVMValueRef tmp0, tmp1;
 257    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 258                                     emit_data->args[0] /* src0.x */,
 259                                     emit_data->args[4] /* src1.x */);
 260    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 261                                     emit_data->args[1] /* src0.y */,
 262                                     emit_data->args[5] /* src1.y */);
 263    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
 264    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 265                                     emit_data->args[2] /* src0.z */,
 266                                     emit_data->args[6] /* src1.z */);
 267    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1);
 268    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 269                                     emit_data->args[3] /* src0.w */,
 270                                     emit_data->args[7] /* src1.w */);
 271    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 272                                                     TGSI_OPCODE_ADD, tmp0, tmp1);
 273 }
 274
 275 static struct lp_build_tgsi_action dp4_action = {
 276    dp4_fetch_args,       /* fetch_args */
 277    dp4_emit      /* emit */
 278 };
 279
 280 /* TGSI_OPCODE_DPH */
 281 static void
 282 dph_fetch_args(
 283    struct lp_build_tgsi_context * bld_base,
 284    struct lp_build_emit_data * emit_data)
 285 {
 286    dp_fetch_args(bld_base, emit_data, 4);
 287    /* src0.w */
 288    emit_data->args[3] = bld_base->base.one;
 289 }
 290
 291 const struct lp_build_tgsi_action dph_action = {
 292    dph_fetch_args,       /* fetch_args */
 293    dp4_emit      /* emit */
 294 };
 295
 296 /* TGSI_OPCODE_DST */
 297 static void
 298 dst_fetch_args(
 299    struct lp_build_tgsi_context * bld_base,
 300    struct lp_build_emit_data * emit_data)
 301 {
 302    /* src0.y */
 303    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
 304                                             0, TGSI_CHAN_Y);
 305    /* src0.z */
 306    emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
 307                                             0, TGSI_CHAN_Z);
 308    /* src1.y */
 309    emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
 310                                             1, TGSI_CHAN_Y);
 311    /* src1.w */
 312    emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
 313                                             1, TGSI_CHAN_W);
 314 }
 315
 316 static void
 317 dst_emit(
 318    const struct lp_build_tgsi_action * action,
 319    struct lp_build_tgsi_context * bld_base,
 320    struct lp_build_emit_data * emit_data)
 321 {
 322    /* dst.x */
 323    emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
 324
 325    /* dst.y */
 326    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
 327                                           TGSI_OPCODE_MUL,
 328                                           emit_data->args[0] /* src0.y */,
 329                                           emit_data->args[2] /* src1.y */);
 330    /* dst.z */
 331    emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */
 332
 333    /* dst.w */
 334    emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */
 335 }
 336
 337 static struct lp_build_tgsi_action dst_action = {
 338    dst_fetch_args,       /* fetch_args */
 339    dst_emit      /* emit */
 340 };
 341
 342 /* TGSI_OPCODE_END */
 343 static void
 344 end_emit(
 345    const struct lp_build_tgsi_action * action,
 346    struct lp_build_tgsi_context * bld_base,
 347    struct lp_build_emit_data * emit_data)
 348 {
 349    bld_base->pc = -1;
 350 }
 351
 352 /* TGSI_OPCODE_EXP */
 353
 354 static void
 355 exp_emit(
 356    const struct lp_build_tgsi_action * action,
 357    struct lp_build_tgsi_context * bld_base,
 358    struct lp_build_emit_data * emit_data)
 359 {
 360    LLVMValueRef floor_x;
 361
 362    /* floor( src0.x ) */
 363    floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
 364                                       emit_data->args[0]);
 365
 366    /* 2 ^ floor( src0.x ) */
 367    emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
 368                                        TGSI_OPCODE_EX2, floor_x);
 369
 370    /* src0.x - floor( src0.x ) */
 371    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
 372                    TGSI_OPCODE_SUB,  emit_data->args[0] /* src0.x */, floor_x);
 373
 374    /* 2 ^ src0.x */
 375    emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base,
 376                              TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */);
 377
 378    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
 379 }
 380
 381 const struct lp_build_tgsi_action exp_action = {
 382    scalar_unary_fetch_args,      /* fetch_args */
 383    exp_emit      /* emit */
 384 };
 385
 386 /* TGSI_OPCODE_FRC */
 387
 388 static void
 389 frc_emit(
 390    const struct lp_build_tgsi_action * action,
 391    struct lp_build_tgsi_context * bld_base,
 392    struct lp_build_emit_data * emit_data)
 393 {
 394    LLVMValueRef tmp;
 395    tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
 396                                   emit_data->args[0]);
 397    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 398                                        TGSI_OPCODE_SUB, emit_data->args[0], tmp);
 399 }
 400
 401 /* TGSI_OPCODE_KILL_IF */
 402
 403 static void
 404 kil_fetch_args(
 405    struct lp_build_tgsi_context * bld_base,
 406    struct lp_build_emit_data * emit_data)
 407 {
 408    /* src0.x */
 409    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
 410                                             0, TGSI_CHAN_X);
 411    /* src0.y */
 412    emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
 413                                             0, TGSI_CHAN_Y);
 414    /* src0.z */
 415    emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst,
 416                                             0, TGSI_CHAN_Z);
 417    /* src0.w */
 418    emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst,
 419                                             0, TGSI_CHAN_W);
 420    emit_data->arg_count = 4;
 421    emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
 422 }
 423
 424 /* TGSI_OPCODE_KILL */
 425
 426 static void
 427 kilp_fetch_args(
 428    struct lp_build_tgsi_context * bld_base,
 429    struct lp_build_emit_data * emit_data)
 430 {
 431    emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
 432 }
 433
 434 /* TGSI_OPCODE_LIT */
 435
 436 static void
 437 lit_fetch_args(
 438    struct lp_build_tgsi_context * bld_base,
 439    struct lp_build_emit_data * emit_data)
 440 {
 441    /* src0.x */
 442    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
 443    /* src0.y */
 444    emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
 445    /* src0.w */
 446    emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
 447    emit_data->arg_count = 3;
 448 }
 449
 450 static void
 451 lit_emit(
 452    const struct lp_build_tgsi_action * action,
 453    struct lp_build_tgsi_context * bld_base,
 454    struct lp_build_emit_data * emit_data)
 455 {
 456    LLVMValueRef tmp0, tmp1, tmp2;
 457
 458    /* dst.x */
 459    emit_data->output[TGSI_CHAN_X] = bld_base->base.one;
 460
 461    /* dst. y */
 462    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
 463                                                TGSI_OPCODE_MAX,
 464                                                emit_data->args[0] /* src0.x */,
 465                                                bld_base->base.zero);
 466
 467    /* dst.z */
 468    /* XMM[1] = SrcReg[0].yyyy */
 469    tmp1 = emit_data->args[1];
 470    /* XMM[1] = max(XMM[1], 0) */
 471    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
 472                                     tmp1, bld_base->base.zero);
 473    /* XMM[2] = SrcReg[0].wwww */
 474    tmp2 = emit_data->args[2];
 475    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW,
 476                                     tmp1, tmp2);
 477    tmp0 = emit_data->args[0];
 478    emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base,
 479                                              TGSI_OPCODE_CMP,
 480                                              tmp0, bld_base->base.zero, tmp1);
 481    /* dst.w */
 482    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
 483 }
 484
 485 static struct lp_build_tgsi_action lit_action = {
 486    lit_fetch_args,       /* fetch_args */
 487    lit_emit      /* emit */
 488 };
 489
 490 /* TGSI_OPCODE_LOG */
 491
 492 static void
 493 log_emit(
 494    const struct lp_build_tgsi_action * action,
 495    struct lp_build_tgsi_context * bld_base,
 496    struct lp_build_emit_data * emit_data)
 497 {
 498
 499    LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x;
 500
 501    /* abs( src0.x) */
 502    abs_x = lp_build_abs(&bld_base->base, emit_data->args[0] /* src0.x */);
 503
 504    /* log( abs( src0.x ) ) */
 505    log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2,
 506                                         abs_x);
 507
 508    /* floor( log( abs( src0.x ) ) ) */
 509    flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR,
 510                                             log_abs_x);
 511    /* dst.x */
 512    emit_data->output[TGSI_CHAN_X] = flr_log_abs_x;
 513
 514    /* dst.y */
 515    ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2,
 516                                                 flr_log_abs_x);
 517
 518    /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */
 519    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
 520                                     TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x);
 521
 522    /* dst.x */
 523    emit_data->output[TGSI_CHAN_Z] = log_abs_x;
 524
 525    /* dst.w */
 526    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
 527 }
 528
 529 static struct lp_build_tgsi_action log_action = {
 530    scalar_unary_fetch_args,      /* fetch_args */
 531    log_emit      /* emit */
 532 };
 533
 534 /* TGSI_OPCODE_PK2H */
 535
 536 static void
 537 pk2h_fetch_args(
 538    struct lp_build_tgsi_context * bld_base,
 539    struct lp_build_emit_data * emit_data)
 540 {
 541    /* src0.x */
 542    emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
 543                                             0, TGSI_CHAN_X);
 544    /* src0.y */
 545    emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
 546                                             0, TGSI_CHAN_Y);
 547 }
 548
 549 static void
 550 pk2h_emit(
 551    const struct lp_build_tgsi_action *action,
 552    struct lp_build_tgsi_context *bld_base,
 553    struct lp_build_emit_data *emit_data)
 554 {
 555    struct gallivm_state *gallivm = bld_base->base.gallivm;
 556    struct lp_type f16i_t;
 557    LLVMValueRef lo, hi, res;
 558
 559    f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
 560    lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
 561    hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
 562    /* maybe some interleave doubling vector width would be useful... */
 563    lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
 564    hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
 565    res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
 566
 567    emit_data->output[emit_data->chan] = res;
 568 }
 569
 570 static struct lp_build_tgsi_action pk2h_action = {
 571    pk2h_fetch_args, /* fetch_args */
 572    pk2h_emit        /* emit */
 573 };
 574
 575 /* TGSI_OPCODE_UP2H */
 576
 577 static void
 578 up2h_emit(
 579    const struct lp_build_tgsi_action *action,
 580    struct lp_build_tgsi_context *bld_base,
 581    struct lp_build_emit_data *emit_data)
 582 {
 583    struct gallivm_state *gallivm = bld_base->base.gallivm;
 584    LLVMBuilderRef builder = gallivm->builder;
 585    LLVMContextRef context = gallivm->context;
 586    LLVMValueRef lo, hi, res[2], arg;
 587    unsigned nr = bld_base->base.type.length;
 588    LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
 589
 590    arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
 591    lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
 592    hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
 593    res[0] = lp_build_half_to_float(gallivm, lo);
 594    res[1] = lp_build_half_to_float(gallivm, hi);
 595
 596    emit_data->output[0] = emit_data->output[2] = res[0];
 597    emit_data->output[1] = emit_data->output[3] = res[1];
 598 }
 599
 600 static struct lp_build_tgsi_action up2h_action = {
 601    scalar_unary_fetch_args, /* fetch_args */
 602    up2h_emit                /* emit */
 603 };
 604
 605 /* TGSI_OPCODE_LRP */
 606
 607 static void
 608 lrp_emit(
 609    const struct lp_build_tgsi_action * action,
 610    struct lp_build_tgsi_context * bld_base,
 611    struct lp_build_emit_data * emit_data)
 612 {
 613    struct lp_build_context *bld = &bld_base->base;
 614    LLVMValueRef inv, a, b;
 615
 616    /* This uses the correct version: (1 - t)*a + t*b
 617     *
 618     * An alternative version is "a + t*(b-a)". The problem is this version
 619     * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
 620     * because of the floating-point rounding.
 621     */
 622    inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
 623    a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
 624    b = lp_build_mul(bld, emit_data->args[2], inv);
 625    emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
 626 }
 627
 628 /* TGSI_OPCODE_MAD */
 629
 630 static void
 631 mad_emit(
 632    const struct lp_build_tgsi_action * action,
 633    struct lp_build_tgsi_context * bld_base,
 634    struct lp_build_emit_data * emit_data)
 635 {
 636    LLVMValueRef tmp;
 637    tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL,
 638                                    emit_data->args[0],
 639                                    emit_data->args[1]);
 640    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 641                                        TGSI_OPCODE_ADD, tmp, emit_data->args[2]);
 642 }
 643
 644 /* TGSI_OPCODE_MOV */
 645
 646 static void
 647 mov_emit(
 648    const struct lp_build_tgsi_action * action,
 649    struct lp_build_tgsi_context * bld_base,
 650    struct lp_build_emit_data * emit_data)
 651 {
 652    emit_data->output[emit_data->chan] = emit_data->args[0];
 653 }
 654
 655 /* TGSI_OPCODE_MUL */
 656 static void
 657 mul_emit(
 658    const struct lp_build_tgsi_action * action,
 659    struct lp_build_tgsi_context * bld_base,
 660    struct lp_build_emit_data * emit_data)
 661 {
 662    emit_data->output[emit_data->chan] = LLVMBuildFMul(
 663                                    bld_base->base.gallivm->builder,
 664                                    emit_data->args[0], emit_data->args[1], "");
 665 }
 666
 667 /*.TGSI_OPCODE_DIV.*/
 668 static void fdiv_emit(
 669    const struct lp_build_tgsi_action * action,
 670    struct lp_build_tgsi_context * bld_base,
 671    struct lp_build_emit_data * emit_data)
 672 {
 673    emit_data->output[emit_data->chan] = LLVMBuildFDiv(
 674                                    bld_base->base.gallivm->builder,
 675                                    emit_data->args[0], emit_data->args[1], "");
 676 }
 677
 678 /*.TGSI_OPCODE_RCP.*/
 679 static void rcp_emit(
 680    const struct lp_build_tgsi_action * action,
 681    struct lp_build_tgsi_context * bld_base,
 682    struct lp_build_emit_data * emit_data)
 683 {
 684    LLVMValueRef one;
 685    one = lp_build_const_float(bld_base->base.gallivm, 1.0f);
 686    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 687                                    TGSI_OPCODE_DIV, one, emit_data->args[0]);
 688 }
 689
 690 /* TGSI_OPCODE_POW */
 691
 692 static void
 693 pow_emit(
 694    const struct lp_build_tgsi_action * action,
 695    struct lp_build_tgsi_context * bld_base,
 696    struct lp_build_emit_data * emit_data)
 697 {
 698    emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
 699                                    emit_data->args[0], emit_data->args[1]);
 700 }
 701
 702 static struct lp_build_tgsi_action pow_action = {
 703    scalar_binary_fetch_args,     /* fetch_args */
 704    pow_emit      /* emit */
 705 };
 706
 707 /* TGSI_OPCODE_RSQ */
 708
 709 static void
 710 rsq_emit(
 711    const struct lp_build_tgsi_action * action,
 712    struct lp_build_tgsi_context * bld_base,
 713    struct lp_build_emit_data * emit_data)
 714 {
 715    if (bld_base->rsq_action.emit) {
 716       bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
 717    } else {
 718       emit_data->output[emit_data->chan] = bld_base->base.undef;
 719    }
 720 }
 721
 722 const struct lp_build_tgsi_action rsq_action = {
 723    scalar_unary_fetch_args,      /* fetch_args */
 724    rsq_emit      /* emit */
 725
 726 };
 727
 728 /* TGSI_OPCODE_SQRT */
 729
 730 static void
 731 sqrt_emit(
 732    const struct lp_build_tgsi_action * action,
 733    struct lp_build_tgsi_context * bld_base,
 734    struct lp_build_emit_data * emit_data)
 735 {
 736    if (bld_base->sqrt_action.emit) {
 737       bld_base->sqrt_action.emit(&bld_base->sqrt_action, bld_base, emit_data);
 738    } else {
 739       emit_data->output[emit_data->chan] = bld_base->base.undef;
 740    }
 741 }
 742
 743 const struct lp_build_tgsi_action sqrt_action = {
 744    scalar_unary_fetch_args,      /* fetch_args */
 745    sqrt_emit     /* emit */
 746 };
 747
 748 /* TGSI_OPCODE_SCS */
 749 static void
 750 scs_emit(
 751    const struct lp_build_tgsi_action * action,
 752    struct lp_build_tgsi_context * bld_base,
 753    struct lp_build_emit_data * emit_data)
 754 {
 755    /* dst.x */
 756    emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
 757                                            TGSI_OPCODE_COS, emit_data->args[0]);
 758    /* dst.y */
 759    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
 760                                            TGSI_OPCODE_SIN, emit_data->args[0]);
 761    /* dst.z */
 762    emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
 763
 764    /* dst.w */
 765    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
 766 }
 767
 768 const struct lp_build_tgsi_action scs_action = {
 769    scalar_unary_fetch_args,      /* fetch_args */
 770    scs_emit      /* emit */
 771 };
 772
 773 /* TGSI_OPCODE_SUB */
 774 static void
 775 sub_emit(
 776    const struct lp_build_tgsi_action * action,
 777    struct lp_build_tgsi_context * bld_base,
 778    struct lp_build_emit_data * emit_data)
 779 {
 780    emit_data->output[emit_data->chan] =
 781       LLVMBuildFSub(bld_base->base.gallivm->builder,
 782                     emit_data->args[0],
 783                     emit_data->args[1], "");
 784 }
 785
 786 /* TGSI_OPCODE_F2U */
 787 static void
 788 f2u_emit(
 789    const struct lp_build_tgsi_action * action,
 790    struct lp_build_tgsi_context * bld_base,
 791    struct lp_build_emit_data * emit_data)
 792 {
 793    emit_data->output[emit_data->chan] =
 794       LLVMBuildFPToUI(bld_base->base.gallivm->builder,
 795                       emit_data->args[0],
 796                       bld_base->base.int_vec_type, "");
 797 }
 798
 799 /* TGSI_OPCODE_U2F */
 800 static void
 801 u2f_emit(
 802    const struct lp_build_tgsi_action * action,
 803    struct lp_build_tgsi_context * bld_base,
 804    struct lp_build_emit_data * emit_data)
 805 {
 806    emit_data->output[emit_data->chan] =
 807       LLVMBuildUIToFP(bld_base->base.gallivm->builder,
 808                       emit_data->args[0],
 809                       bld_base->base.vec_type, "");
 810 }
 811
 812 static void
 813 umad_emit(
 814    const struct lp_build_tgsi_action * action,
 815    struct lp_build_tgsi_context * bld_base,
 816    struct lp_build_emit_data * emit_data)
 817 {
 818    LLVMValueRef tmp;
 819    tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMUL,
 820                                    emit_data->args[0],
 821                                    emit_data->args[1]);
 822    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
 823                                        TGSI_OPCODE_UADD, tmp, emit_data->args[2]);
 824 }
 825
 826 /* TGSI_OPCODE_UMUL */
 827 static void
 828 umul_emit(
 829    const struct lp_build_tgsi_action * action,
 830    struct lp_build_tgsi_context * bld_base,
 831    struct lp_build_emit_data * emit_data)
 832 {
 833    emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint_bld,
 834                                    emit_data->args[0], emit_data->args[1]);
 835 }
 836
 837 /* TGSI_OPCODE_IMUL_HI */
 838 static void
 839 imul_hi_emit(
 840    const struct lp_build_tgsi_action * action,
 841    struct lp_build_tgsi_context * bld_base,
 842    struct lp_build_emit_data * emit_data)
 843 {
 844    struct lp_build_context *int_bld = &bld_base->int_bld;
 845    LLVMValueRef hi_bits;
 846
 847    assert(int_bld->type.width == 32);
 848
 849    /* low result bits are tossed away */
 850    lp_build_mul_32_lohi(int_bld, emit_data->args[0],
 851                         emit_data->args[1], &hi_bits);
 852    emit_data->output[emit_data->chan] = hi_bits;
 853 }
 854
 855 static void
 856 imul_hi_emit_cpu(
 857    const struct lp_build_tgsi_action * action,
 858    struct lp_build_tgsi_context * bld_base,
 859    struct lp_build_emit_data * emit_data)
 860 {
 861    struct lp_build_context *int_bld = &bld_base->int_bld;
 862    LLVMValueRef hi_bits;
 863
 864    assert(int_bld->type.width == 32);
 865
 866    /* low result bits are tossed away */
 867    lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
 868                             emit_data->args[1], &hi_bits);
 869    emit_data->output[emit_data->chan] = hi_bits;
 870 }
 871
 872 /* TGSI_OPCODE_UMUL_HI */
 873 static void
 874 umul_hi_emit(
 875    const struct lp_build_tgsi_action * action,
 876    struct lp_build_tgsi_context * bld_base,
 877    struct lp_build_emit_data * emit_data)
 878 {
 879    struct lp_build_context *uint_bld = &bld_base->uint_bld;
 880    LLVMValueRef hi_bits;
 881
 882    assert(uint_bld->type.width == 32);
 883
 884    /* low result bits are tossed away */
 885    lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
 886                         emit_data->args[1], &hi_bits);
 887    emit_data->output[emit_data->chan] = hi_bits;
 888 }
 889
 890 static void
 891 umul_hi_emit_cpu(
 892    const struct lp_build_tgsi_action * action,
 893    struct lp_build_tgsi_context * bld_base,
 894    struct lp_build_emit_data * emit_data)
 895 {
 896    struct lp_build_context *uint_bld = &bld_base->uint_bld;
 897    LLVMValueRef hi_bits;
 898
 899    assert(uint_bld->type.width == 32);
 900
 901    /* low result bits are tossed away */
 902    lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
 903                             emit_data->args[1], &hi_bits);
 904    emit_data->output[emit_data->chan] = hi_bits;
 905 }
 906
 907 /* TGSI_OPCODE_MAX */
 908 static void fmax_emit(
 909    const struct lp_build_tgsi_action * action,
 910    struct lp_build_tgsi_context * bld_base,
 911    struct lp_build_emit_data * emit_data)
 912 {
 913    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 914    emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
 915                                    LLVMBuildFCmp(builder, LLVMRealUGE,
 916                                    emit_data->args[0], emit_data->args[1], ""),
 917                                    emit_data->args[0], emit_data->args[1], "");
 918 }
 919
 920 /* TGSI_OPCODE_MIN */
 921 static void fmin_emit(
 922    const struct lp_build_tgsi_action * action,
 923    struct lp_build_tgsi_context * bld_base,
 924    struct lp_build_emit_data * emit_data)
 925 {
 926    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 927    emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
 928                                    LLVMBuildFCmp(builder, LLVMRealUGE,
 929                                    emit_data->args[0], emit_data->args[1], ""),
 930                                    emit_data->args[1], emit_data->args[0], "");
 931 }
 932
 933 /* TGSI_OPCODE_XPD */
 934
 935 static void
 936 xpd_fetch_args(
 937    struct lp_build_tgsi_context * bld_base,
 938    struct lp_build_emit_data * emit_data)
 939 {
 940    dp_fetch_args(bld_base, emit_data, 3);
 941 }
 942
 943 /**
 944  * (a * b) - (c * d)
 945  */
 946 static LLVMValueRef
 947 xpd_helper(
 948   struct lp_build_tgsi_context * bld_base,
 949   LLVMValueRef a,
 950   LLVMValueRef b,
 951   LLVMValueRef c,
 952   LLVMValueRef d)
 953 {
 954    LLVMValueRef tmp0, tmp1;
 955
 956    tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b);
 957    tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d);
 958
 959    return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1);
 960 }
 961
 962 static void
 963 xpd_emit(
 964    const struct lp_build_tgsi_action * action,
 965    struct lp_build_tgsi_context * bld_base,
 966    struct lp_build_emit_data * emit_data)
 967 {
 968    emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base,
 969               emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */,
 970               emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */);
 971
 972    emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base,
 973               emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */,
 974               emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */);
 975
 976    emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base,
 977               emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */,
 978               emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */);
 979
 980    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
 981 }
 982
 983 const struct lp_build_tgsi_action xpd_action = {
 984    xpd_fetch_args,       /* fetch_args */
 985    xpd_emit      /* emit */
 986 };
 987
 988 /* TGSI_OPCODE_D2F */
 989 static void
 990 d2f_emit(
 991    const struct lp_build_tgsi_action * action,
 992    struct lp_build_tgsi_context * bld_base,
 993    struct lp_build_emit_data * emit_data)
 994 {
 995    emit_data->output[emit_data->chan] =
 996       LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
 997                       emit_data->args[0],
 998                        bld_base->base.vec_type, "");
 999 }
1000
1001 /* TGSI_OPCODE_D2I */
1002 static void
1003 d2i_emit(
1004    const struct lp_build_tgsi_action * action,
1005    struct lp_build_tgsi_context * bld_base,
1006    struct lp_build_emit_data * emit_data)
1007 {
1008    emit_data->output[emit_data->chan] =
1009       LLVMBuildFPToSI(bld_base->base.gallivm->builder,
1010                       emit_data->args[0],
1011                       bld_base->base.int_vec_type, "");
1012 }
1013
1014 /* TGSI_OPCODE_D2U */
1015 static void
1016 d2u_emit(
1017    const struct lp_build_tgsi_action * action,
1018    struct lp_build_tgsi_context * bld_base,
1019    struct lp_build_emit_data * emit_data)
1020 {
1021    emit_data->output[emit_data->chan] =
1022       LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1023                       emit_data->args[0],
1024                       bld_base->base.int_vec_type, "");
1025 }
1026
1027 /* TGSI_OPCODE_F2D */
1028 static void
1029 f2d_emit(
1030    const struct lp_build_tgsi_action * action,
1031    struct lp_build_tgsi_context * bld_base,
1032    struct lp_build_emit_data * emit_data)
1033 {
1034    emit_data->output[emit_data->chan] =
1035       LLVMBuildFPExt(bld_base->base.gallivm->builder,
1036                       emit_data->args[0],
1037                       bld_base->dbl_bld.vec_type, "");
1038 }
1039
1040 /* TGSI_OPCODE_U2D */
1041 static void
1042 u2d_emit(
1043    const struct lp_build_tgsi_action * action,
1044    struct lp_build_tgsi_context * bld_base,
1045    struct lp_build_emit_data * emit_data)
1046 {
1047    emit_data->output[emit_data->chan] =
1048       LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1049                       emit_data->args[0],
1050                       bld_base->dbl_bld.vec_type, "");
1051 }
1052
1053 /* TGSI_OPCODE_I2D */
1054 static void
1055 i2d_emit(
1056    const struct lp_build_tgsi_action * action,
1057    struct lp_build_tgsi_context * bld_base,
1058    struct lp_build_emit_data * emit_data)
1059 {
1060    emit_data->output[emit_data->chan] =
1061       LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1062                       emit_data->args[0],
1063                       bld_base->dbl_bld.vec_type, "");
1064 }
1065
1066 /* TGSI_OPCODE_DMAD */
1067 static void
1068 dmad_emit(
1069    const struct lp_build_tgsi_action * action,
1070    struct lp_build_tgsi_context * bld_base,
1071    struct lp_build_emit_data * emit_data)
1072 {
1073    LLVMValueRef tmp;
1074    tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
1075                                    emit_data->args[0],
1076                                    emit_data->args[1]);
1077    emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
1078                                        TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
1079 }
1080
1081 /*.TGSI_OPCODE_DRCP.*/
1082 static void drcp_emit(
1083    const struct lp_build_tgsi_action * action,
1084    struct lp_build_tgsi_context * bld_base,
1085    struct lp_build_emit_data * emit_data)
1086 {
1087    LLVMValueRef one;
1088    one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
1089    emit_data->output[emit_data->chan] = LLVMBuildFDiv(
1090       bld_base->base.gallivm->builder,
1091       one, emit_data->args[0], "");
1092 }
1093
1094 /* TGSI_OPCODE_DFRAC */
1095 static void dfrac_emit(
1096    const struct lp_build_tgsi_action * action,
1097    struct lp_build_tgsi_context * bld_base,
1098    struct lp_build_emit_data * emit_data)
1099 {
1100    LLVMValueRef tmp;
1101    tmp = lp_build_floor(&bld_base->dbl_bld,
1102                         emit_data->args[0]);
1103    emit_data->output[emit_data->chan] =  LLVMBuildFSub(bld_base->base.gallivm->builder,
1104                                                        emit_data->args[0], tmp, "");
1105 }
1106
1107 static void
1108 u64mul_emit(
1109    const struct lp_build_tgsi_action * action,
1110    struct lp_build_tgsi_context * bld_base,
1111    struct lp_build_emit_data * emit_data)
1112 {
1113    emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->uint64_bld,
1114                                    emit_data->args[0], emit_data->args[1]);
1115 }
1116
1117 static void
1118 u64mod_emit_cpu(
1119    const struct lp_build_tgsi_action * action,
1120    struct lp_build_tgsi_context * bld_base,
1121    struct lp_build_emit_data * emit_data)
1122 {
1123    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1124    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
1125                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1126                                         bld_base->uint64_bld.zero);
1127    /* We want to make sure that we never divide/mod by zero to not
1128     * generate sigfpe. We don't want to crash just because the
1129     * shader is doing something weird. */
1130    LLVMValueRef divisor = LLVMBuildOr(builder,
1131                                       div_mask,
1132                                       emit_data->args[1], "");
1133    LLVMValueRef result = lp_build_mod(&bld_base->uint64_bld,
1134                                       emit_data->args[0], divisor);
1135    /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1136    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1137                                                     div_mask,
1138                                                     result, "");
1139 }
1140
1141 static void
1142 i64mod_emit_cpu(
1143    const struct lp_build_tgsi_action * action,
1144    struct lp_build_tgsi_context * bld_base,
1145    struct lp_build_emit_data * emit_data)
1146 {
1147    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1148    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
1149                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1150                                         bld_base->uint64_bld.zero);
1151    /* We want to make sure that we never divide/mod by zero to not
1152     * generate sigfpe. We don't want to crash just because the
1153     * shader is doing something weird. */
1154    LLVMValueRef divisor = LLVMBuildOr(builder,
1155                                       div_mask,
1156                                       emit_data->args[1], "");
1157    LLVMValueRef result = lp_build_mod(&bld_base->int64_bld,
1158                                       emit_data->args[0], divisor);
1159    /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1160    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1161                                                     div_mask,
1162                                                     result, "");
1163 }
1164
1165 static void
1166 u64div_emit_cpu(
1167    const struct lp_build_tgsi_action * action,
1168    struct lp_build_tgsi_context * bld_base,
1169    struct lp_build_emit_data * emit_data)
1170 {
1171
1172    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1173    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint64_bld,
1174                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1175                                         bld_base->uint64_bld.zero);
1176    /* We want to make sure that we never divide/mod by zero to not
1177     * generate sigfpe. We don't want to crash just because the
1178     * shader is doing something weird. */
1179    LLVMValueRef divisor = LLVMBuildOr(builder,
1180                                       div_mask,
1181                                       emit_data->args[1], "");
1182    LLVMValueRef result = LLVMBuildUDiv(builder,
1183                                        emit_data->args[0], divisor, "");
1184    /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1185    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1186                                                     div_mask,
1187                                                     result, "");
1188 }
1189
1190 static void
1191 i64div_emit_cpu(
1192    const struct lp_build_tgsi_action * action,
1193    struct lp_build_tgsi_context * bld_base,
1194    struct lp_build_emit_data * emit_data)
1195 {
1196
1197    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1198    LLVMValueRef div_mask = lp_build_cmp(&bld_base->int64_bld,
1199                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1200                                         bld_base->int64_bld.zero);
1201    /* We want to make sure that we never divide/mod by zero to not
1202     * generate sigfpe. We don't want to crash just because the
1203     * shader is doing something weird. */
1204    LLVMValueRef divisor = LLVMBuildOr(builder,
1205                                       div_mask,
1206                                       emit_data->args[1], "");
1207    LLVMValueRef result = LLVMBuildSDiv(builder,
1208                                        emit_data->args[0], divisor, "");
1209    /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
1210    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1211                                                     div_mask,
1212                                                     result, "");
1213 }
1214
1215 static void
1216 f2u64_emit(
1217    const struct lp_build_tgsi_action * action,
1218    struct lp_build_tgsi_context * bld_base,
1219    struct lp_build_emit_data * emit_data)
1220 {
1221    emit_data->output[emit_data->chan] =
1222       LLVMBuildFPToUI(bld_base->base.gallivm->builder,
1223                       emit_data->args[0],
1224                       bld_base->uint64_bld.vec_type, "");
1225 }
1226
1227 static void
1228 f2i64_emit(
1229    const struct lp_build_tgsi_action * action,
1230    struct lp_build_tgsi_context * bld_base,
1231    struct lp_build_emit_data * emit_data)
1232 {
1233    emit_data->output[emit_data->chan] =
1234       LLVMBuildFPToSI(bld_base->base.gallivm->builder,
1235                       emit_data->args[0],
1236                       bld_base->int64_bld.vec_type, "");
1237 }
1238
1239 static void
1240 u2i64_emit(
1241    const struct lp_build_tgsi_action * action,
1242    struct lp_build_tgsi_context * bld_base,
1243    struct lp_build_emit_data * emit_data)
1244 {
1245    emit_data->output[emit_data->chan] =
1246       LLVMBuildZExt(bld_base->base.gallivm->builder,
1247                       emit_data->args[0],
1248                       bld_base->uint64_bld.vec_type, "");
1249 }
1250
1251 static void
1252 i2i64_emit(
1253    const struct lp_build_tgsi_action * action,
1254    struct lp_build_tgsi_context * bld_base,
1255    struct lp_build_emit_data * emit_data)
1256 {
1257    emit_data->output[emit_data->chan] =
1258       LLVMBuildSExt(bld_base->base.gallivm->builder,
1259                       emit_data->args[0],
1260                       bld_base->int64_bld.vec_type, "");
1261 }
1262
1263 static void
1264 i642f_emit(
1265    const struct lp_build_tgsi_action * action,
1266    struct lp_build_tgsi_context * bld_base,
1267    struct lp_build_emit_data * emit_data)
1268 {
1269    emit_data->output[emit_data->chan] =
1270       LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1271                       emit_data->args[0],
1272                       bld_base->base.vec_type, "");
1273 }
1274
1275 static void
1276 u642f_emit(
1277    const struct lp_build_tgsi_action * action,
1278    struct lp_build_tgsi_context * bld_base,
1279    struct lp_build_emit_data * emit_data)
1280 {
1281    emit_data->output[emit_data->chan] =
1282       LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1283                       emit_data->args[0],
1284                       bld_base->base.vec_type, "");
1285 }
1286
1287 static void
1288 i642d_emit(
1289    const struct lp_build_tgsi_action * action,
1290    struct lp_build_tgsi_context * bld_base,
1291    struct lp_build_emit_data * emit_data)
1292 {
1293    emit_data->output[emit_data->chan] =
1294       LLVMBuildSIToFP(bld_base->base.gallivm->builder,
1295                       emit_data->args[0],
1296                       bld_base->dbl_bld.vec_type, "");
1297 }
1298
1299 static void
1300 u642d_emit(
1301    const struct lp_build_tgsi_action * action,
1302    struct lp_build_tgsi_context * bld_base,
1303    struct lp_build_emit_data * emit_data)
1304 {
1305    emit_data->output[emit_data->chan] =
1306       LLVMBuildUIToFP(bld_base->base.gallivm->builder,
1307                       emit_data->args[0],
1308                       bld_base->dbl_bld.vec_type, "");
1309 }
1310
1311 void
1312 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
1313 {
1314    bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action;
1315    bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action;
1316    bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action;
1317    bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action;
1318    bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action;
1319    bld_base->op_actions[TGSI_OPCODE_DST] = dst_action;
1320    bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
1321    bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
1322    bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
1323    bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
1324    bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
1325    bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
1326    bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
1327    bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
1328    bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
1329    bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
1330
1331    bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
1332    bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
1333    bld_base->op_actions[TGSI_OPCODE_CASE].fetch_args = scalar_unary_fetch_args;
1334    bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args;
1335    bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
1336    bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
1337    bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
1338    bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
1339    bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
1340    bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
1341    bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
1342    bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
1343
1344    bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit;
1345    bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit;
1346    bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit;
1347    bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit;
1348    bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit;
1349    bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit;
1350    bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit;
1351    bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit;
1352    bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
1353    bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
1354    bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
1355    bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
1356
1357    bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
1358    bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
1359    bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
1360    bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
1361    bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
1362    bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit;
1363    bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit;
1364
1365    bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
1366    bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
1367
1368    bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
1369    bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
1370    bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
1371    bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
1372
1373    bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
1374    bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
1375    bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
1376
1377    bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
1378    bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
1379    bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
1380
1381    bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
1382
1383    bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
1384    bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
1385
1386    bld_base->op_actions[TGSI_OPCODE_U64MUL].emit = u64mul_emit;
1387
1388    bld_base->op_actions[TGSI_OPCODE_F2I64].emit = f2i64_emit;
1389    bld_base->op_actions[TGSI_OPCODE_F2U64].emit = f2u64_emit;
1390
1391    bld_base->op_actions[TGSI_OPCODE_D2I64].emit = f2i64_emit;
1392    bld_base->op_actions[TGSI_OPCODE_D2U64].emit = f2u64_emit;
1393
1394    bld_base->op_actions[TGSI_OPCODE_I2I64].emit = i2i64_emit;
1395    bld_base->op_actions[TGSI_OPCODE_U2I64].emit = u2i64_emit;
1396
1397    bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1398    bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1399
1400    bld_base->op_actions[TGSI_OPCODE_I642F].emit = i642f_emit;
1401    bld_base->op_actions[TGSI_OPCODE_U642F].emit = u642f_emit;
1402
1403    bld_base->op_actions[TGSI_OPCODE_I642D].emit = i642d_emit;
1404    bld_base->op_actions[TGSI_OPCODE_U642D].emit = u642d_emit;
1405
1406 }
1407
1408 /* CPU Only default actions */
1409
1410 /* These actions are CPU only, because they could potentially output SSE
1411  * intrinsics.
1412  */
1413
1414 /* TGSI_OPCODE_ADD (CPU Only) */
1415 static void
1416 add_emit_cpu(
1417    const struct lp_build_tgsi_action * action,
1418    struct lp_build_tgsi_context * bld_base,
1419    struct lp_build_emit_data * emit_data)
1420 {
1421    emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base,
1422                                    emit_data->args[0], emit_data->args[1]);
1423 }
1424
1425 /* TGSI_OPCODE_AND (CPU Only) */
1426 static void
1427 and_emit_cpu(
1428    const struct lp_build_tgsi_action * action,
1429    struct lp_build_tgsi_context * bld_base,
1430    struct lp_build_emit_data * emit_data)
1431 {
1432    emit_data->output[emit_data->chan] = lp_build_and(&bld_base->uint_bld,
1433                                    emit_data->args[0], emit_data->args[1]);
1434 }
1435
1436 /* TGSI_OPCODE_ARL (CPU Only) */
1437 static void
1438 arl_emit_cpu(
1439    const struct lp_build_tgsi_action * action,
1440    struct lp_build_tgsi_context * bld_base,
1441    struct lp_build_emit_data * emit_data)
1442 {
1443    LLVMValueRef tmp;
1444    tmp = lp_build_floor(&bld_base->base,
1445                         emit_data->args[0]);
1446    emit_data->output[emit_data->chan] = LLVMBuildFPToSI(bld_base->base.gallivm->builder, tmp,
1447                                                         bld_base->uint_bld.vec_type, "");
1448 }
1449
1450 /* TGSI_OPCODE_ARR (CPU Only) */
1451 static void
1452 arr_emit_cpu(
1453    const struct lp_build_tgsi_action * action,
1454    struct lp_build_tgsi_context * bld_base,
1455    struct lp_build_emit_data * emit_data)
1456 {
1457    emit_data->output[emit_data->chan] = lp_build_iround(&bld_base->base, emit_data->args[0]);
1458 }
1459
1460 /* TGSI_OPCODE_CEIL (CPU Only) */
1461 static void
1462 ceil_emit_cpu(
1463    const struct lp_build_tgsi_action * action,
1464    struct lp_build_tgsi_context * bld_base,
1465    struct lp_build_emit_data * emit_data)
1466 {
1467    emit_data->output[emit_data->chan] = lp_build_ceil(&bld_base->base,
1468                                                       emit_data->args[0]);
1469 }
1470
1471 /* TGSI_OPCODE_CMP (CPU Only) */
1472 static void
1473 cmp_emit_cpu(
1474    const struct lp_build_tgsi_action * action,
1475    struct lp_build_tgsi_context * bld_base,
1476    struct lp_build_emit_data * emit_data)
1477 {
1478    LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS,
1479                                    emit_data->args[0], bld_base->base.zero);
1480    emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1481                                 cond, emit_data->args[1], emit_data->args[2]);
1482 }
1483
1484 /* TGSI_OPCODE_UCMP (CPU Only) */
1485 static void
1486 ucmp_emit_cpu(
1487    const struct lp_build_tgsi_action * action,
1488    struct lp_build_tgsi_context * bld_base,
1489    struct lp_build_emit_data * emit_data)
1490 {
1491    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1492    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1493    LLVMValueRef unsigned_cond =
1494       LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
1495    LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
1496                                     unsigned_cond,
1497                                     uint_bld->zero);
1498    emit_data->output[emit_data->chan] =
1499       lp_build_select(&bld_base->base,
1500                       cond, emit_data->args[1], emit_data->args[2]);
1501 }
1502
1503 /* TGSI_OPCODE_COS (CPU Only) */
1504 static void
1505 cos_emit_cpu(
1506    const struct lp_build_tgsi_action * action,
1507    struct lp_build_tgsi_context * bld_base,
1508    struct lp_build_emit_data * emit_data)
1509 {
1510    emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base,
1511                                                        emit_data->args[0]);
1512 }
1513
1514 /* TGSI_OPCODE_DIV (CPU Only) */
1515 static void
1516 div_emit_cpu(
1517    const struct lp_build_tgsi_action * action,
1518    struct lp_build_tgsi_context * bld_base,
1519    struct lp_build_emit_data * emit_data)
1520 {
1521    emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base,
1522                                    emit_data->args[0], emit_data->args[1]);
1523 }
1524
1525 /* TGSI_OPCODE_EX2 (CPU Only) */
1526 static void
1527 ex2_emit_cpu(
1528    const struct lp_build_tgsi_action * action,
1529    struct lp_build_tgsi_context * bld_base,
1530    struct lp_build_emit_data * emit_data)
1531 {
1532    emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base,
1533                                                         emit_data->args[0]);
1534 }
1535
1536 /* TGSI_OPCODE_F2I (CPU Only) */
1537 static void
1538 f2i_emit_cpu(
1539    const struct lp_build_tgsi_action * action,
1540    struct lp_build_tgsi_context * bld_base,
1541    struct lp_build_emit_data * emit_data)
1542 {
1543    emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
1544                                                         emit_data->args[0]);
1545 }
1546
1547 /* TGSI_OPCODE_FSET Helper (CPU Only) */
1548 static void
1549 fset_emit_cpu(
1550    const struct lp_build_tgsi_action * action,
1551    struct lp_build_tgsi_context * bld_base,
1552    struct lp_build_emit_data * emit_data,
1553    unsigned pipe_func)
1554 {
1555    LLVMValueRef cond;
1556
1557    if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1558       cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1559                                   emit_data->args[0], emit_data->args[1]);
1560    }
1561    else {
1562       cond = lp_build_cmp(&bld_base->base, pipe_func,
1563                           emit_data->args[0], emit_data->args[1]);
1564
1565    }
1566    emit_data->output[emit_data->chan] = cond;
1567 }
1568
1569
1570 /* TGSI_OPCODE_FSEQ (CPU Only) */
1571 static void
1572 fseq_emit_cpu(
1573    const struct lp_build_tgsi_action * action,
1574    struct lp_build_tgsi_context * bld_base,
1575    struct lp_build_emit_data * emit_data)
1576 {
1577    fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1578 }
1579
1580 /* TGSI_OPCODE_ISGE (CPU Only) */
1581 static void
1582 fsge_emit_cpu(
1583    const struct lp_build_tgsi_action * action,
1584    struct lp_build_tgsi_context * bld_base,
1585    struct lp_build_emit_data * emit_data)
1586 {
1587    fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1588 }
1589
1590 /* TGSI_OPCODE_ISLT (CPU Only) */
1591 static void
1592 fslt_emit_cpu(
1593    const struct lp_build_tgsi_action * action,
1594    struct lp_build_tgsi_context * bld_base,
1595    struct lp_build_emit_data * emit_data)
1596 {
1597    fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1598 }
1599
1600 /* TGSI_OPCODE_USNE (CPU Only) */
1601
1602 static void
1603 fsne_emit_cpu(
1604    const struct lp_build_tgsi_action * action,
1605    struct lp_build_tgsi_context * bld_base,
1606    struct lp_build_emit_data * emit_data)
1607 {
1608    fset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
1609 }
1610
1611 /* TGSI_OPCODE_FLR (CPU Only) */
1612
1613 static void
1614 flr_emit_cpu(
1615    const struct lp_build_tgsi_action * action,
1616    struct lp_build_tgsi_context * bld_base,
1617    struct lp_build_emit_data * emit_data)
1618 {
1619    emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base,
1620                                                          emit_data->args[0]);
1621 }
1622
1623 /* TGSI_OPCODE_I2F (CPU Only) */
1624 static void
1625 i2f_emit_cpu(
1626    const struct lp_build_tgsi_action * action,
1627    struct lp_build_tgsi_context * bld_base,
1628    struct lp_build_emit_data * emit_data)
1629 {
1630    emit_data->output[emit_data->chan] = lp_build_int_to_float(&bld_base->base,
1631                                                               emit_data->args[0]);
1632 }
1633
1634 /* TGSI_OPCODE_IABS (CPU Only) */
1635 static void
1636 iabs_emit_cpu(
1637    const struct lp_build_tgsi_action * action,
1638    struct lp_build_tgsi_context * bld_base,
1639    struct lp_build_emit_data * emit_data)
1640 {
1641    emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
1642                                                        emit_data->args[0]);
1643 }
1644
1645 /* TGSI_OPCODE_IDIV (CPU Only) */
1646 static void
1647 idiv_emit_cpu(
1648    const struct lp_build_tgsi_action * action,
1649    struct lp_build_tgsi_context * bld_base,
1650    struct lp_build_emit_data * emit_data)
1651 {
1652    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1653    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1654                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1655                                         bld_base->uint_bld.zero);
1656    /* We want to make sure that we never divide/mod by zero to not
1657     * generate sigfpe. We don't want to crash just because the
1658     * shader is doing something weird. */
1659    LLVMValueRef divisor = LLVMBuildOr(builder,
1660                                       div_mask,
1661                                       emit_data->args[1], "");
1662    LLVMValueRef result = lp_build_div(&bld_base->int_bld,
1663                                       emit_data->args[0], divisor);
1664    LLVMValueRef not_div_mask = LLVMBuildNot(builder,
1665                                             div_mask,"");
1666    /* idiv by zero doesn't have a guaranteed return value chose 0 for now. */
1667    emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1668                                                      not_div_mask,
1669                                                      result, "");
1670 }
1671
1672 /* TGSI_OPCODE_INEG (CPU Only) */
1673 static void
1674 ineg_emit_cpu(
1675    const struct lp_build_tgsi_action * action,
1676    struct lp_build_tgsi_context * bld_base,
1677    struct lp_build_emit_data * emit_data)
1678 {
1679    emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int_bld,
1680                                                      bld_base->int_bld.zero,
1681                                                      emit_data->args[0]);
1682 }
1683
1684 /* TGSI_OPCODE_ISET Helper (CPU Only) */
1685 static void
1686 iset_emit_cpu(
1687    const struct lp_build_tgsi_action * action,
1688    struct lp_build_tgsi_context * bld_base,
1689    struct lp_build_emit_data * emit_data,
1690    unsigned pipe_func)
1691 {
1692    LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
1693                                     emit_data->args[0], emit_data->args[1]);
1694    emit_data->output[emit_data->chan] = cond;
1695 }
1696
1697 /* TGSI_OPCODE_IMAX (CPU Only) */
1698 static void
1699 imax_emit_cpu(
1700    const struct lp_build_tgsi_action * action,
1701    struct lp_build_tgsi_context * bld_base,
1702    struct lp_build_emit_data * emit_data)
1703 {
1704    emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int_bld,
1705                                    emit_data->args[0], emit_data->args[1]);
1706 }
1707
1708 /* TGSI_OPCODE_IMIN (CPU Only) */
1709 static void
1710 imin_emit_cpu(
1711    const struct lp_build_tgsi_action * action,
1712    struct lp_build_tgsi_context * bld_base,
1713    struct lp_build_emit_data * emit_data)
1714 {
1715    emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int_bld,
1716                                    emit_data->args[0], emit_data->args[1]);
1717 }
1718
1719 /* TGSI_OPCODE_ISGE (CPU Only) */
1720 static void
1721 isge_emit_cpu(
1722    const struct lp_build_tgsi_action * action,
1723    struct lp_build_tgsi_context * bld_base,
1724    struct lp_build_emit_data * emit_data)
1725 {
1726    iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1727 }
1728
1729 /* TGSI_OPCODE_ISHR (CPU Only) */
1730 static void
1731 ishr_emit_cpu(
1732    const struct lp_build_tgsi_action * action,
1733    struct lp_build_tgsi_context * bld_base,
1734    struct lp_build_emit_data * emit_data)
1735 {
1736    struct lp_build_context *int_bld = &bld_base->int_bld;
1737    LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
1738                                           int_bld->type.width - 1);
1739    LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
1740    emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
1741                                                      masked_count);
1742 }
1743
1744 /* TGSI_OPCODE_ISLT (CPU Only) */
1745 static void
1746 islt_emit_cpu(
1747    const struct lp_build_tgsi_action * action,
1748    struct lp_build_tgsi_context * bld_base,
1749    struct lp_build_emit_data * emit_data)
1750 {
1751    iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
1752 }
1753
1754
1755 /* TGSI_OPCODE_ISSG (CPU Only) */
1756 static void
1757 issg_emit_cpu(
1758    const struct lp_build_tgsi_action * action,
1759    struct lp_build_tgsi_context * bld_base,
1760    struct lp_build_emit_data * emit_data)
1761 {
1762    emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
1763                                                        emit_data->args[0]);
1764 }
1765
1766 /* TGSI_OPCODE_LG2 (CPU Only) */
1767 static void
1768 lg2_emit_cpu(
1769    const struct lp_build_tgsi_action * action,
1770    struct lp_build_tgsi_context * bld_base,
1771    struct lp_build_emit_data * emit_data)
1772 {
1773    emit_data->output[emit_data->chan] = lp_build_log2_safe(&bld_base->base,
1774                                                            emit_data->args[0]);
1775 }
1776
1777 /* TGSI_OPCODE_LOG (CPU Only) */
1778 static void
1779 log_emit_cpu(
1780    const struct lp_build_tgsi_action * action,
1781    struct lp_build_tgsi_context * bld_base,
1782    struct lp_build_emit_data * emit_data)
1783 {
1784    LLVMValueRef p_floor_log2;
1785    LLVMValueRef p_exp;
1786    LLVMValueRef p_log2;
1787    LLVMValueRef src0 = emit_data->args[0];
1788
1789    lp_build_log2_approx(&bld_base->base, src0,
1790                         &p_exp, &p_floor_log2, &p_log2, FALSE);
1791
1792    emit_data->output[TGSI_CHAN_X] = p_floor_log2;
1793
1794    emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base,
1795                                              TGSI_OPCODE_DIV,
1796                                              src0, p_exp);
1797    emit_data->output[TGSI_CHAN_Z] = p_log2;
1798
1799    emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
1800
1801 }
1802
1803 /* TGSI_OPCODE_MAD (CPU Only) */
1804
1805 static void
1806 mad_emit_cpu(
1807    const struct lp_build_tgsi_action * action,
1808    struct lp_build_tgsi_context * bld_base,
1809    struct lp_build_emit_data * emit_data)
1810 {
1811    emit_data->output[emit_data->chan] =
1812       lp_build_mad(&bld_base->base,
1813                    emit_data->args[0], emit_data->args[1], emit_data->args[2]);
1814 }
1815
1816 /* TGSI_OPCODE_MAX (CPU Only) */
1817
1818 static void
1819 max_emit_cpu(
1820    const struct lp_build_tgsi_action * action,
1821    struct lp_build_tgsi_context * bld_base,
1822    struct lp_build_emit_data * emit_data)
1823 {
1824    emit_data->output[emit_data->chan] =
1825       lp_build_max_ext(&bld_base->base,
1826                        emit_data->args[0], emit_data->args[1],
1827                        GALLIVM_NAN_RETURN_OTHER);
1828 }
1829
1830 /* TGSI_OPCODE_MIN (CPU Only) */
1831 static void
1832 min_emit_cpu(
1833    const struct lp_build_tgsi_action * action,
1834    struct lp_build_tgsi_context * bld_base,
1835    struct lp_build_emit_data * emit_data)
1836 {
1837    emit_data->output[emit_data->chan] =
1838       lp_build_min_ext(&bld_base->base,
1839                        emit_data->args[0], emit_data->args[1],
1840                        GALLIVM_NAN_RETURN_OTHER);
1841 }
1842
1843 /* TGSI_OPCODE_MOD (CPU Only) */
1844 static void
1845 mod_emit_cpu(
1846    const struct lp_build_tgsi_action * action,
1847    struct lp_build_tgsi_context * bld_base,
1848    struct lp_build_emit_data * emit_data)
1849 {
1850    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1851    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
1852                                         PIPE_FUNC_EQUAL, emit_data->args[1],
1853                                         bld_base->uint_bld.zero);
1854    /* We want to make sure that we never divide/mod by zero to not
1855     * generate sigfpe. We don't want to crash just because the
1856     * shader is doing something weird. */
1857    LLVMValueRef divisor = LLVMBuildOr(builder,
1858                                       div_mask,
1859                                       emit_data->args[1], "");
1860    LLVMValueRef result = lp_build_mod(&bld_base->int_bld,
1861                                       emit_data->args[0], divisor);
1862    /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
1863    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1864                                                     div_mask,
1865                                                     result, "");
1866 }
1867
1868 /* TGSI_OPCODE_NOT */
1869 static void
1870 not_emit_cpu(
1871    const struct lp_build_tgsi_action * action,
1872    struct lp_build_tgsi_context * bld_base,
1873    struct lp_build_emit_data * emit_data)
1874 {
1875    emit_data->output[emit_data->chan] = lp_build_not(&bld_base->uint_bld,
1876                                                      emit_data->args[0]);
1877 }
1878
1879 /* TGSI_OPCODE_OR (CPU Only) */
1880 static void
1881 or_emit_cpu(
1882    const struct lp_build_tgsi_action * action,
1883    struct lp_build_tgsi_context * bld_base,
1884    struct lp_build_emit_data * emit_data)
1885 {
1886    emit_data->output[emit_data->chan] = lp_build_or(&bld_base->uint_bld,
1887                                    emit_data->args[0], emit_data->args[1]);
1888 }
1889
1890 /* TGSI_OPCODE_POW (CPU Only) */
1891 static void
1892 pow_emit_cpu(
1893    const struct lp_build_tgsi_action * action,
1894    struct lp_build_tgsi_context * bld_base,
1895    struct lp_build_emit_data * emit_data)
1896 {
1897    emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base,
1898                                    emit_data->args[0], emit_data->args[1]);
1899 }
1900
1901
1902 /* TGSI_OPCODE_RCP (CPU Only) */
1903
1904 static void
1905 rcp_emit_cpu(
1906    const struct lp_build_tgsi_action * action,
1907    struct lp_build_tgsi_context * bld_base,
1908    struct lp_build_emit_data * emit_data)
1909 {
1910    emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base,
1911                                                        emit_data->args[0]);
1912 }
1913
1914 /* Reciprical squareroot (CPU Only) */
1915 static void
1916 recip_sqrt_emit_cpu(
1917    const struct lp_build_tgsi_action * action,
1918    struct lp_build_tgsi_context * bld_base,
1919    struct lp_build_emit_data * emit_data)
1920 {
1921    emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base,
1922                                                          emit_data->args[0]);
1923 }
1924
1925 static void
1926 sqrt_emit_cpu(
1927    const struct lp_build_tgsi_action * action,
1928    struct lp_build_tgsi_context * bld_base,
1929    struct lp_build_emit_data * emit_data)
1930 {
1931    emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->base,
1932                                                       emit_data->args[0]);
1933 }
1934
1935
1936 /* TGSI_OPCODE_ROUND (CPU Only) */
1937 static void
1938 round_emit_cpu(
1939    const struct lp_build_tgsi_action * action,
1940    struct lp_build_tgsi_context * bld_base,
1941    struct lp_build_emit_data * emit_data)
1942 {
1943    emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base,
1944                                                          emit_data->args[0]);
1945 }
1946
1947 /* TGSI_OPCODE_SET Helper (CPU Only) */
1948
1949 static void
1950 set_emit_cpu(
1951    const struct lp_build_tgsi_action * action,
1952    struct lp_build_tgsi_context * bld_base,
1953    struct lp_build_emit_data * emit_data,
1954    unsigned pipe_func)
1955 {
1956    LLVMValueRef cond;
1957
1958    if (pipe_func != PIPE_FUNC_NOTEQUAL) {
1959       cond = lp_build_cmp_ordered(&bld_base->base, pipe_func,
1960                                   emit_data->args[0], emit_data->args[1]);
1961    }
1962    else {
1963       cond = lp_build_cmp(&bld_base->base, pipe_func,
1964                           emit_data->args[0], emit_data->args[1]);
1965
1966    }
1967    emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base,
1968                                           cond,
1969                                           bld_base->base.one,
1970                                           bld_base->base.zero);
1971 }
1972
1973 /* TGSI_OPCODE_SEQ (CPU Only) */
1974
1975 static void
1976 seq_emit_cpu(
1977    const struct lp_build_tgsi_action * action,
1978    struct lp_build_tgsi_context * bld_base,
1979    struct lp_build_emit_data * emit_data)
1980 {
1981    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
1982 }
1983
1984 /* TGSI_OPCODE_SGE (CPU Only) */
1985 static void
1986 sge_emit_cpu(
1987    const struct lp_build_tgsi_action * action,
1988    struct lp_build_tgsi_context * bld_base,
1989    struct lp_build_emit_data * emit_data)
1990 {
1991    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
1992 }
1993
1994 /* TGSI_OPCODE_SGT (CPU Only)*/
1995
1996 static void
1997 sgt_emit_cpu(
1998    const struct lp_build_tgsi_action * action,
1999    struct lp_build_tgsi_context * bld_base,
2000    struct lp_build_emit_data * emit_data)
2001 {
2002    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER);
2003 }
2004
2005 /* TGSI_OPCODE_SHL (CPU Only) */
2006 static void
2007 shl_emit_cpu(
2008    const struct lp_build_tgsi_action * action,
2009    struct lp_build_tgsi_context * bld_base,
2010    struct lp_build_emit_data * emit_data)
2011 {
2012    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2013    LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2014                                           uint_bld->type.width - 1);
2015    LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2016    emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
2017                                                      masked_count);
2018 }
2019
2020 /* TGSI_OPCODE_SIN (CPU Only) */
2021 static void
2022 sin_emit_cpu(
2023    const struct lp_build_tgsi_action * action,
2024    struct lp_build_tgsi_context * bld_base,
2025    struct lp_build_emit_data * emit_data)
2026 {
2027    emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base,
2028                                                        emit_data->args[0]);
2029 }
2030
2031 /* TGSI_OPCODE_SLE (CPU Only) */
2032 static void
2033 sle_emit_cpu(
2034    const struct lp_build_tgsi_action * action,
2035    struct lp_build_tgsi_context * bld_base,
2036    struct lp_build_emit_data * emit_data)
2037 {
2038    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL);
2039 }
2040
2041 /* TGSI_OPCODE_SLT (CPU Only) */
2042 static void
2043 slt_emit_cpu(
2044    const struct lp_build_tgsi_action * action,
2045    struct lp_build_tgsi_context * bld_base,
2046    struct lp_build_emit_data * emit_data)
2047 {
2048    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2049 }
2050
2051 /* TGSI_OPCODE_SNE (CPU Only) */
2052
2053 static void
2054 sne_emit_cpu(
2055    const struct lp_build_tgsi_action * action,
2056    struct lp_build_tgsi_context * bld_base,
2057    struct lp_build_emit_data * emit_data)
2058 {
2059    set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2060 }
2061
2062 /* TGSI_OPCODE_SSG (CPU Only) */
2063
2064 static void
2065 ssg_emit_cpu(
2066    const struct lp_build_tgsi_action * action,
2067    struct lp_build_tgsi_context * bld_base,
2068    struct lp_build_emit_data * emit_data)
2069 {
2070    emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base,
2071                                                        emit_data->args[0]);
2072 }
2073
2074 /* TGSI_OPCODE_SUB (CPU Only) */
2075
2076 static void
2077 sub_emit_cpu(
2078    const struct lp_build_tgsi_action * action,
2079    struct lp_build_tgsi_context * bld_base,
2080    struct lp_build_emit_data * emit_data)
2081 {
2082    emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base,
2083                                                         emit_data->args[0],
2084                                                         emit_data->args[1]);
2085 }
2086
2087 /* TGSI_OPCODE_TRUNC (CPU Only) */
2088
2089 static void
2090 trunc_emit_cpu(
2091    const struct lp_build_tgsi_action * action,
2092    struct lp_build_tgsi_context * bld_base,
2093    struct lp_build_emit_data * emit_data)
2094 {
2095    emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base,
2096                                                          emit_data->args[0]);
2097 }
2098
2099 /* TGSI_OPCODE_UADD (CPU Only) */
2100 static void
2101 uadd_emit_cpu(
2102    const struct lp_build_tgsi_action * action,
2103    struct lp_build_tgsi_context * bld_base,
2104    struct lp_build_emit_data * emit_data)
2105 {
2106    emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint_bld,
2107                                    emit_data->args[0], emit_data->args[1]);
2108 }
2109
2110 /* TGSI_OPCODE_UDIV (CPU Only) */
2111 static void
2112 udiv_emit_cpu(
2113    const struct lp_build_tgsi_action * action,
2114    struct lp_build_tgsi_context * bld_base,
2115    struct lp_build_emit_data * emit_data)
2116 {
2117
2118    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2119    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
2120                                         PIPE_FUNC_EQUAL, emit_data->args[1],
2121                                         bld_base->uint_bld.zero);
2122    /* We want to make sure that we never divide/mod by zero to not
2123     * generate sigfpe. We don't want to crash just because the
2124     * shader is doing something weird. */
2125    LLVMValueRef divisor = LLVMBuildOr(builder,
2126                                       div_mask,
2127                                       emit_data->args[1], "");
2128    LLVMValueRef result = lp_build_div(&bld_base->uint_bld,
2129                                       emit_data->args[0], divisor);
2130    /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 */
2131    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
2132                                                     div_mask,
2133                                                     result, "");
2134 }
2135
2136 /* TGSI_OPCODE_UMAX (CPU Only) */
2137 static void
2138 umax_emit_cpu(
2139    const struct lp_build_tgsi_action * action,
2140    struct lp_build_tgsi_context * bld_base,
2141    struct lp_build_emit_data * emit_data)
2142 {
2143    emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint_bld,
2144                                    emit_data->args[0], emit_data->args[1]);
2145 }
2146
2147 /* TGSI_OPCODE_UMIN (CPU Only) */
2148 static void
2149 umin_emit_cpu(
2150    const struct lp_build_tgsi_action * action,
2151    struct lp_build_tgsi_context * bld_base,
2152    struct lp_build_emit_data * emit_data)
2153 {
2154    emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint_bld,
2155                                    emit_data->args[0], emit_data->args[1]);
2156 }
2157
2158 /* TGSI_OPCODE_UMOD (CPU Only) */
2159 static void
2160 umod_emit_cpu(
2161    const struct lp_build_tgsi_action * action,
2162    struct lp_build_tgsi_context * bld_base,
2163    struct lp_build_emit_data * emit_data)
2164 {
2165    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2166    LLVMValueRef div_mask = lp_build_cmp(&bld_base->uint_bld,
2167                                         PIPE_FUNC_EQUAL, emit_data->args[1],
2168                                         bld_base->uint_bld.zero);
2169    /* We want to make sure that we never divide/mod by zero to not
2170     * generate sigfpe. We don't want to crash just because the
2171     * shader is doing something weird. */
2172    LLVMValueRef divisor = LLVMBuildOr(builder,
2173                                       div_mask,
2174                                       emit_data->args[1], "");
2175    LLVMValueRef result = lp_build_mod(&bld_base->uint_bld,
2176                                       emit_data->args[0], divisor);
2177    /* umod by zero is guaranteed to return 0xffffffff */
2178    emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
2179                                                     div_mask,
2180                                                     result, "");
2181 }
2182
2183 /* TGSI_OPCODE_USET Helper (CPU Only) */
2184 static void
2185 uset_emit_cpu(
2186    const struct lp_build_tgsi_action * action,
2187    struct lp_build_tgsi_context * bld_base,
2188    struct lp_build_emit_data * emit_data,
2189    unsigned pipe_func)
2190 {
2191    LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
2192                                     emit_data->args[0], emit_data->args[1]);
2193    emit_data->output[emit_data->chan] = cond;
2194 }
2195
2196
2197 /* TGSI_OPCODE_USEQ (CPU Only) */
2198 static void
2199 useq_emit_cpu(
2200    const struct lp_build_tgsi_action * action,
2201    struct lp_build_tgsi_context * bld_base,
2202    struct lp_build_emit_data * emit_data)
2203 {
2204    uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2205 }
2206
2207 /* TGSI_OPCODE_ISGE (CPU Only) */
2208 static void
2209 usge_emit_cpu(
2210    const struct lp_build_tgsi_action * action,
2211    struct lp_build_tgsi_context * bld_base,
2212    struct lp_build_emit_data * emit_data)
2213 {
2214    uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2215 }
2216
2217 /* TGSI_OPCODE_USHR (CPU Only) */
2218 static void
2219 ushr_emit_cpu(
2220    const struct lp_build_tgsi_action * action,
2221    struct lp_build_tgsi_context * bld_base,
2222    struct lp_build_emit_data * emit_data)
2223 {
2224    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2225    LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2226                                           uint_bld->type.width - 1);
2227    LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2228    emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2229                                                      masked_count);
2230 }
2231
2232 /* TGSI_OPCODE_ISLT (CPU Only) */
2233 static void
2234 uslt_emit_cpu(
2235    const struct lp_build_tgsi_action * action,
2236    struct lp_build_tgsi_context * bld_base,
2237    struct lp_build_emit_data * emit_data)
2238 {
2239    uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2240 }
2241
2242 /* TGSI_OPCODE_USNE (CPU Only) */
2243
2244 static void
2245 usne_emit_cpu(
2246    const struct lp_build_tgsi_action * action,
2247    struct lp_build_tgsi_context * bld_base,
2248    struct lp_build_emit_data * emit_data)
2249 {
2250    uset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2251 }
2252
2253 /* TGSI_OPCODE_XOR */
2254 static void
2255 xor_emit_cpu(
2256    const struct lp_build_tgsi_action * action,
2257    struct lp_build_tgsi_context * bld_base,
2258    struct lp_build_emit_data * emit_data)
2259 {
2260    emit_data->output[emit_data->chan] = lp_build_xor(&bld_base->uint_bld,
2261                                                      emit_data->args[0],
2262                                                      emit_data->args[1]);
2263 }
2264
2265 /* TGSI_OPCODE_DABS (CPU Only) */
2266 static void
2267 dabs_emit_cpu(
2268    const struct lp_build_tgsi_action * action,
2269    struct lp_build_tgsi_context * bld_base,
2270    struct lp_build_emit_data * emit_data)
2271 {
2272    emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
2273                                                        emit_data->args[0]);
2274 }
2275
2276 /* TGSI_OPCODE_DNEG (CPU Only) */
2277 static void
2278 dneg_emit_cpu(
2279    const struct lp_build_tgsi_action * action,
2280    struct lp_build_tgsi_context * bld_base,
2281    struct lp_build_emit_data * emit_data)
2282 {
2283    emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
2284                                                      bld_base->dbl_bld.zero,
2285                                                      emit_data->args[0]);
2286 }
2287
2288 /* TGSI_OPCODE_DSET Helper (CPU Only) */
2289 static void
2290 dset_emit_cpu(
2291    const struct lp_build_tgsi_action * action,
2292    struct lp_build_tgsi_context * bld_base,
2293    struct lp_build_emit_data * emit_data,
2294    unsigned pipe_func)
2295 {
2296    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2297    LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
2298                                     emit_data->args[0], emit_data->args[1]);
2299    /* arguments were 64 bit but store as 32 bit */
2300    cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2301    emit_data->output[emit_data->chan] = cond;
2302 }
2303
2304 /* TGSI_OPCODE_DSEQ (CPU Only) */
2305 static void
2306 dseq_emit_cpu(
2307    const struct lp_build_tgsi_action * action,
2308    struct lp_build_tgsi_context * bld_base,
2309    struct lp_build_emit_data * emit_data)
2310 {
2311    dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2312 }
2313
2314 /* TGSI_OPCODE_DSGE (CPU Only) */
2315 static void
2316 dsge_emit_cpu(
2317    const struct lp_build_tgsi_action * action,
2318    struct lp_build_tgsi_context * bld_base,
2319    struct lp_build_emit_data * emit_data)
2320 {
2321    dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2322 }
2323
2324 /* TGSI_OPCODE_DSLT (CPU Only) */
2325 static void
2326 dslt_emit_cpu(
2327    const struct lp_build_tgsi_action * action,
2328    struct lp_build_tgsi_context * bld_base,
2329    struct lp_build_emit_data * emit_data)
2330 {
2331    dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2332 }
2333
2334 /* TGSI_OPCODE_DSNE (CPU Only) */
2335 static void
2336 dsne_emit_cpu(
2337    const struct lp_build_tgsi_action * action,
2338    struct lp_build_tgsi_context * bld_base,
2339    struct lp_build_emit_data * emit_data)
2340 {
2341    dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2342 }
2343
2344 /* Double Reciprocal squareroot (CPU Only) */
2345 static void
2346 drecip_sqrt_emit_cpu(
2347    const struct lp_build_tgsi_action * action,
2348    struct lp_build_tgsi_context * bld_base,
2349    struct lp_build_emit_data * emit_data)
2350 {
2351    emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
2352                                                          emit_data->args[0]);
2353 }
2354
2355 /* Double Squareroot (CPU Only) */
2356 static void
2357 dsqrt_emit_cpu(
2358    const struct lp_build_tgsi_action * action,
2359    struct lp_build_tgsi_context * bld_base,
2360    struct lp_build_emit_data * emit_data)
2361 {
2362    emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
2363                                                       emit_data->args[0]);
2364 }
2365
2366 static void
2367 i64abs_emit_cpu(
2368    const struct lp_build_tgsi_action * action,
2369    struct lp_build_tgsi_context * bld_base,
2370    struct lp_build_emit_data * emit_data)
2371 {
2372    emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int64_bld,
2373                                                        emit_data->args[0]);
2374 }
2375
2376 static void
2377 i64ssg_emit_cpu(
2378    const struct lp_build_tgsi_action * action,
2379    struct lp_build_tgsi_context * bld_base,
2380    struct lp_build_emit_data * emit_data)
2381 {
2382    emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int64_bld,
2383                                                        emit_data->args[0]);
2384 }
2385
2386 static void
2387 i64neg_emit_cpu(
2388    const struct lp_build_tgsi_action * action,
2389    struct lp_build_tgsi_context * bld_base,
2390    struct lp_build_emit_data * emit_data)
2391 {
2392    emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->int64_bld,
2393                                                      bld_base->int64_bld.zero,
2394                                                      emit_data->args[0]);
2395 }
2396
2397 static void
2398 u64set_emit_cpu(
2399    const struct lp_build_tgsi_action * action,
2400    struct lp_build_tgsi_context * bld_base,
2401    struct lp_build_emit_data * emit_data,
2402    unsigned pipe_func)
2403 {
2404    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2405    LLVMValueRef cond = lp_build_cmp(&bld_base->uint64_bld, pipe_func,
2406                                     emit_data->args[0], emit_data->args[1]);
2407    /* arguments were 64 bit but store as 32 bit */
2408    cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2409    emit_data->output[emit_data->chan] = cond;
2410 }
2411
2412 static void
2413 u64seq_emit_cpu(
2414    const struct lp_build_tgsi_action * action,
2415    struct lp_build_tgsi_context * bld_base,
2416    struct lp_build_emit_data * emit_data)
2417 {
2418    u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
2419 }
2420
2421 static void
2422 u64sne_emit_cpu(
2423    const struct lp_build_tgsi_action * action,
2424    struct lp_build_tgsi_context * bld_base,
2425    struct lp_build_emit_data * emit_data)
2426 {
2427    u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
2428 }
2429
2430 static void
2431 u64slt_emit_cpu(
2432    const struct lp_build_tgsi_action * action,
2433    struct lp_build_tgsi_context * bld_base,
2434    struct lp_build_emit_data * emit_data)
2435 {
2436    u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2437 }
2438
2439 static void
2440 u64sge_emit_cpu(
2441    const struct lp_build_tgsi_action * action,
2442    struct lp_build_tgsi_context * bld_base,
2443    struct lp_build_emit_data * emit_data)
2444 {
2445    u64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2446 }
2447
2448 static void
2449 i64set_emit_cpu(
2450    const struct lp_build_tgsi_action * action,
2451    struct lp_build_tgsi_context * bld_base,
2452    struct lp_build_emit_data * emit_data,
2453    unsigned pipe_func)
2454 {
2455    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2456    LLVMValueRef cond = lp_build_cmp(&bld_base->int64_bld, pipe_func,
2457                                     emit_data->args[0], emit_data->args[1]);
2458    /* arguments were 64 bit but store as 32 bit */
2459    cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
2460    emit_data->output[emit_data->chan] = cond;
2461 }
2462
2463 static void
2464 i64slt_emit_cpu(
2465    const struct lp_build_tgsi_action * action,
2466    struct lp_build_tgsi_context * bld_base,
2467    struct lp_build_emit_data * emit_data)
2468 {
2469    i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
2470 }
2471
2472 static void
2473 i64sge_emit_cpu(
2474    const struct lp_build_tgsi_action * action,
2475    struct lp_build_tgsi_context * bld_base,
2476    struct lp_build_emit_data * emit_data)
2477 {
2478    i64set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
2479 }
2480
2481 static void
2482 u64max_emit_cpu(
2483    const struct lp_build_tgsi_action * action,
2484    struct lp_build_tgsi_context * bld_base,
2485    struct lp_build_emit_data * emit_data)
2486 {
2487    emit_data->output[emit_data->chan] = lp_build_max(&bld_base->uint64_bld,
2488                                    emit_data->args[0], emit_data->args[1]);
2489 }
2490
2491 static void
2492 u64min_emit_cpu(
2493    const struct lp_build_tgsi_action * action,
2494    struct lp_build_tgsi_context * bld_base,
2495    struct lp_build_emit_data * emit_data)
2496 {
2497    emit_data->output[emit_data->chan] = lp_build_min(&bld_base->uint64_bld,
2498                                    emit_data->args[0], emit_data->args[1]);
2499 }
2500
2501 static void
2502 i64max_emit_cpu(
2503    const struct lp_build_tgsi_action * action,
2504    struct lp_build_tgsi_context * bld_base,
2505    struct lp_build_emit_data * emit_data)
2506 {
2507    emit_data->output[emit_data->chan] = lp_build_max(&bld_base->int64_bld,
2508                                    emit_data->args[0], emit_data->args[1]);
2509 }
2510
2511 static void
2512 i64min_emit_cpu(
2513    const struct lp_build_tgsi_action * action,
2514    struct lp_build_tgsi_context * bld_base,
2515    struct lp_build_emit_data * emit_data)
2516 {
2517    emit_data->output[emit_data->chan] = lp_build_min(&bld_base->int64_bld,
2518                                    emit_data->args[0], emit_data->args[1]);
2519 }
2520
2521 static void
2522 u64add_emit_cpu(
2523    const struct lp_build_tgsi_action * action,
2524    struct lp_build_tgsi_context * bld_base,
2525    struct lp_build_emit_data * emit_data)
2526 {
2527    emit_data->output[emit_data->chan] = lp_build_add(&bld_base->uint64_bld,
2528                                    emit_data->args[0], emit_data->args[1]);
2529 }
2530
2531 static void
2532 u64shl_emit_cpu(
2533    const struct lp_build_tgsi_action * action,
2534    struct lp_build_tgsi_context * bld_base,
2535    struct lp_build_emit_data * emit_data)
2536 {
2537    struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2538    LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2539                                           uint_bld->type.width - 1);
2540    LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2541    emit_data->output[emit_data->chan] = lp_build_shl(uint_bld, emit_data->args[0],
2542                                                      masked_count);
2543 }
2544
2545 static void
2546 i64shr_emit_cpu(
2547    const struct lp_build_tgsi_action * action,
2548    struct lp_build_tgsi_context * bld_base,
2549    struct lp_build_emit_data * emit_data)
2550 {
2551    struct lp_build_context *int_bld = &bld_base->int64_bld;
2552    LLVMValueRef mask = lp_build_const_vec(int_bld->gallivm, int_bld->type,
2553                                           int_bld->type.width - 1);
2554    LLVMValueRef masked_count = lp_build_and(int_bld, emit_data->args[1], mask);
2555    emit_data->output[emit_data->chan] = lp_build_shr(int_bld, emit_data->args[0],
2556                                                      masked_count);
2557 }
2558
2559 static void
2560 u64shr_emit_cpu(
2561    const struct lp_build_tgsi_action * action,
2562    struct lp_build_tgsi_context * bld_base,
2563    struct lp_build_emit_data * emit_data)
2564 {
2565    struct lp_build_context *uint_bld = &bld_base->uint64_bld;
2566    LLVMValueRef mask = lp_build_const_vec(uint_bld->gallivm, uint_bld->type,
2567                                           uint_bld->type.width - 1);
2568    LLVMValueRef masked_count = lp_build_and(uint_bld, emit_data->args[1], mask);
2569    emit_data->output[emit_data->chan] = lp_build_shr(uint_bld, emit_data->args[0],
2570                                                      masked_count);
2571 }
2572
2573 void
2574 lp_set_default_actions_cpu(
2575    struct lp_build_tgsi_context * bld_base)
2576 {
2577    lp_set_default_actions(bld_base);
2578    bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu;
2579    bld_base->op_actions[TGSI_OPCODE_AND].emit = and_emit_cpu;
2580    bld_base->op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
2581    bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
2582    bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
2583    bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
2584    bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
2585    bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
2586    bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
2587    bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
2588    bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
2589    bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
2590    bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;
2591    bld_base->op_actions[TGSI_OPCODE_FSLT].emit = fslt_emit_cpu;
2592    bld_base->op_actions[TGSI_OPCODE_FSNE].emit = fsne_emit_cpu;
2593
2594    bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
2595    bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
2596    bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
2597    bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
2598    bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
2599    bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
2600    bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
2601    bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
2602    bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
2603    bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
2604    bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
2605    bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
2606
2607    bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
2608    bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
2609    bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu;
2610    bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
2611    bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
2612    bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
2613    bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
2614    bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
2615    bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
2616    bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu;
2617    bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu;
2618    bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu;
2619    bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu;
2620    bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu;
2621    bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu;
2622    bld_base->op_actions[TGSI_OPCODE_SHL].emit = shl_emit_cpu;
2623    bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu;
2624    bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu;
2625    bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu;
2626    bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu;
2627    bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu;
2628    bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu;
2629
2630    bld_base->rsq_action.emit = recip_sqrt_emit_cpu;
2631    bld_base->sqrt_action.emit = sqrt_emit_cpu;
2632
2633    bld_base->op_actions[TGSI_OPCODE_UADD].emit = uadd_emit_cpu;
2634    bld_base->op_actions[TGSI_OPCODE_UCMP].emit = ucmp_emit_cpu;
2635    bld_base->op_actions[TGSI_OPCODE_UDIV].emit = udiv_emit_cpu;
2636    bld_base->op_actions[TGSI_OPCODE_UMAX].emit = umax_emit_cpu;
2637    bld_base->op_actions[TGSI_OPCODE_UMIN].emit = umin_emit_cpu;
2638    bld_base->op_actions[TGSI_OPCODE_UMOD].emit = umod_emit_cpu;
2639    bld_base->op_actions[TGSI_OPCODE_USEQ].emit = useq_emit_cpu;
2640    bld_base->op_actions[TGSI_OPCODE_USGE].emit = usge_emit_cpu;
2641    bld_base->op_actions[TGSI_OPCODE_USHR].emit = ushr_emit_cpu;
2642    bld_base->op_actions[TGSI_OPCODE_USLT].emit = uslt_emit_cpu;
2643    bld_base->op_actions[TGSI_OPCODE_USNE].emit = usne_emit_cpu;
2644
2645    bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
2646
2647    bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
2648    bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
2649    bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
2650    bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
2651    bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
2652    bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
2653
2654    bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
2655    bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
2656
2657    bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = i64abs_emit_cpu;
2658    bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = i64ssg_emit_cpu;
2659    bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = i64neg_emit_cpu;
2660
2661    bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = u64seq_emit_cpu;
2662    bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = u64sne_emit_cpu;
2663    bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = u64slt_emit_cpu;
2664    bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = u64sge_emit_cpu;
2665    bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = i64slt_emit_cpu;
2666    bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = i64sge_emit_cpu;
2667
2668    bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = u64min_emit_cpu;
2669    bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = u64max_emit_cpu;
2670    bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = i64min_emit_cpu;
2671    bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = i64max_emit_cpu;
2672
2673    bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = u64add_emit_cpu;
2674    bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = u64mod_emit_cpu;
2675    bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = i64mod_emit_cpu;
2676    bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = u64div_emit_cpu;
2677    bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = i64div_emit_cpu;
2678
2679    bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = u64shl_emit_cpu;
2680    bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = i64shr_emit_cpu;
2681    bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = u64shr_emit_cpu;
2682 }