src/gallium/drivers/llvmpipe/lp_state_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "util/u_math.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_simple_list.h"
  32 #include "os/os_time.h"
  33 #include "gallivm/lp_bld_arit.h"
  34 #include "gallivm/lp_bld_bitarit.h"
  35 #include "gallivm/lp_bld_const.h"
  36 #include "gallivm/lp_bld_debug.h"
  37 #include "gallivm/lp_bld_init.h"
  38 #include "gallivm/lp_bld_logic.h"
  39 #include "gallivm/lp_bld_intr.h"
  40 #include "gallivm/lp_bld_flow.h"
  41 #include "gallivm/lp_bld_type.h"
  42
  43 #include "lp_perf.h"
  44 #include "lp_debug.h"
  45 #include "lp_flush.h"
  46 #include "lp_screen.h"
  47 #include "lp_context.h"
  48 #include "lp_state.h"
  49 #include "lp_state_fs.h"
  50 #include "lp_state_setup.h"
  51
  52
  53
  54 /* currently organized to interpolate full float[4] attributes even
  55  * when some elements are unused.  Later, can pack vertex data more
  56  * closely.
  57  */
  58
  59
  60 struct lp_setup_args
  61 {
  62    /* Function arguments:
  63     */
  64    LLVMValueRef v0;
  65    LLVMValueRef v1;
  66    LLVMValueRef v2;
  67    LLVMValueRef facing;         /* boolean */
  68    LLVMValueRef a0;
  69    LLVMValueRef dadx;
  70    LLVMValueRef dady;
  71
  72    /* Derived:
  73     */
  74    LLVMValueRef x0_center;
  75    LLVMValueRef y0_center;
  76    LLVMValueRef dy20_ooa;
  77    LLVMValueRef dy01_ooa;
  78    LLVMValueRef dx20_ooa;
  79    LLVMValueRef dx01_ooa;
  80 };
  81
  82
  83
  84 static LLVMTypeRef
  85 type4f(struct gallivm_state *gallivm)
  86 {
  87    return LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
  88 }
  89
  90
  91 /* Equivalent of _mm_setr_ps(a,b,c,d)
  92  */
  93 static LLVMValueRef
  94 vec4f(struct gallivm_state *gallivm,
  95       LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
  96       const char *name)
  97 {
  98    LLVMBuilderRef bld = gallivm->builder;
  99    LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
 100    LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
 101    LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
 102    LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
 103
 104    LLVMValueRef res = LLVMGetUndef(type4f(gallivm));
 105
 106    res = LLVMBuildInsertElement(bld, res, a, i0, "");
 107    res = LLVMBuildInsertElement(bld, res, b, i1, "");
 108    res = LLVMBuildInsertElement(bld, res, c, i2, "");
 109    res = LLVMBuildInsertElement(bld, res, d, i3, name);
 110
 111    return res;
 112 }
 113
 114 /* Equivalent of _mm_set1_ps(a)
 115  */
 116 static LLVMValueRef
 117 vec4f_from_scalar(struct gallivm_state *gallivm,
 118                   LLVMValueRef a,
 119                   const char *name)
 120 {
 121    LLVMBuilderRef bld = gallivm->builder;
 122    LLVMValueRef res = LLVMGetUndef(type4f(gallivm));
 123    int i;
 124
 125    for(i = 0; i < 4; ++i) {
 126       LLVMValueRef index = lp_build_const_int32(gallivm, i);
 127       res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
 128    }
 129
 130    return res;
 131 }
 132
 133 static void
 134 store_coef(struct gallivm_state *gallivm,
 135            struct lp_setup_args *args,
 136            unsigned slot,
 137            LLVMValueRef a0,
 138            LLVMValueRef dadx,
 139            LLVMValueRef dady)
 140 {
 141    LLVMBuilderRef builder = gallivm->builder;
 142    LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
 143
 144    LLVMBuildStore(builder,
 145                   a0,
 146                   LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
 147
 148    LLVMBuildStore(builder,
 149                   dadx,
 150                   LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
 151
 152    LLVMBuildStore(builder,
 153                   dady,
 154                   LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
 155 }
 156
 157
 158
 159 static void
 160 emit_constant_coef4(struct gallivm_state *gallivm,
 161                      struct lp_setup_args *args,
 162                      unsigned slot,
 163                      LLVMValueRef vert)
 164 {
 165    LLVMValueRef zero      = lp_build_const_float(gallivm, 0.0);
 166    LLVMValueRef zerovec   = vec4f_from_scalar(gallivm, zero, "zero");
 167    store_coef(gallivm, args, slot, vert, zerovec, zerovec);
 168 }
 169
 170
 171
 172 /**
 173  * Setup the fragment input attribute with the front-facing value.
 174  * \param frontface  is the triangle front facing?
 175  */
 176 static void
 177 emit_facing_coef(struct gallivm_state *gallivm,
 178                   struct lp_setup_args *args,
 179                   unsigned slot )
 180 {
 181    LLVMBuilderRef builder = gallivm->builder;
 182    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
 183    LLVMValueRef a0_0 = args->facing;
 184    LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
 185    LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
 186    /* Our face val is either 1 or 0 so we do
 187     * face = (val * 2) - 1
 188     * to make it 1 or -1
 189     */
 190    LLVMValueRef face_val =
 191       LLVMBuildFAdd(builder,
 192                     LLVMBuildFMul(builder, a0_0f,
 193                                   lp_build_const_float(gallivm, 2.0),
 194                                   ""),
 195                     lp_build_const_float(gallivm, -1.0),
 196                     "");
 197    LLVMValueRef a0 = vec4f(gallivm, face_val, zero, zero, zero, "facing");
 198    LLVMValueRef zerovec = vec4f_from_scalar(gallivm, zero, "zero");
 199
 200    store_coef(gallivm, args, slot, a0, zerovec, zerovec);
 201 }
 202
 203
 204 static LLVMValueRef
 205 vert_attrib(struct gallivm_state *gallivm,
 206             LLVMValueRef vert,
 207             int attr,
 208             int elem,
 209             const char *name)
 210 {
 211    LLVMBuilderRef b = gallivm->builder;
 212    LLVMValueRef idx[2];
 213    idx[0] = lp_build_const_int32(gallivm, attr);
 214    idx[1] = lp_build_const_int32(gallivm, elem);
 215    return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
 216 }
 217
 218
 219 static void
 220 lp_twoside(struct gallivm_state *gallivm,
 221            struct lp_setup_args *args,
 222            const struct lp_setup_variant_key *key,
 223            int bcolor_slot,
 224            LLVMValueRef attribv[3])
 225 {
 226    LLVMBuilderRef b = gallivm->builder;
 227    LLVMValueRef a0_back, a1_back, a2_back;
 228    LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
 229
 230    LLVMValueRef facing = args->facing;
 231    LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
 232
 233    a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
 234    a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
 235    a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
 236
 237    /* Possibly swap the front and back attrib values,
 238     *
 239     * Prefer select to if so we don't have to worry about phis or
 240     * allocas.
 241     */
 242    attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
 243    attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
 244    attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
 245
 246 }
 247
 248 static void
 249 lp_do_offset_tri(struct gallivm_state *gallivm,
 250                  struct lp_setup_args *args,
 251                  const struct lp_setup_variant_key *key,
 252                  LLVMValueRef inv_det,
 253                  LLVMValueRef dxyz01,
 254                  LLVMValueRef dxyz20,
 255                  LLVMValueRef attribv[3])
 256 {
 257    LLVMBuilderRef b = gallivm->builder;
 258    struct lp_build_context bld;
 259    struct lp_build_context flt_scalar_bld;
 260    struct lp_build_context int_scalar_bld;
 261    LLVMValueRef zoffset, mult;
 262    LLVMValueRef z0_new, z1_new, z2_new;
 263    LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
 264    LLVMValueRef z0z1, z0z1z2;
 265    LLVMValueRef max, max_value, res12;
 266    LLVMValueRef shuffles[4];
 267    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
 268    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
 269    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
 270    LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
 271    LLVMValueRef threei  = lp_build_const_int32(gallivm, 3);
 272
 273    /* (res12) = cross(e,f).xy */
 274    shuffles[0] = twoi;
 275    shuffles[1] = zeroi;
 276    shuffles[2] = onei;
 277    shuffles[3] = twoi;
 278    dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
 279
 280    shuffles[0] = onei;
 281    shuffles[1] = twoi;
 282    shuffles[2] = twoi;
 283    shuffles[3] = zeroi;
 284    dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
 285
 286    dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
 287
 288    shuffles[0] = twoi;
 289    shuffles[1] = threei;
 290    shuffles[2] = LLVMGetUndef(shuf_type);
 291    shuffles[3] = LLVMGetUndef(shuf_type);
 292    dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
 293                                         LLVMConstVector(shuffles, 4), "");
 294
 295    res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
 296
 297    /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
 298    lp_build_context_init(&bld, gallivm, lp_type_float_vec(32, 128));
 299    dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
 300    dzdxdzdy = lp_build_abs(&bld, dzdxdzdy);
 301
 302    dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
 303    dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
 304
 305    /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
 306    max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
 307    max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
 308
 309    mult = LLVMBuildFMul(b, max_value,
 310                         lp_build_const_float(gallivm, key->pgon_offset_scale), "");
 311
 312    lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
 313
 314    if (key->floating_point_depth) {
 315       /*
 316        * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) +
 317        *           MAX2(dzdx, dzdy) * pgon_offset_scale
 318        *
 319        * NOTE: Assumes IEEE float32.
 320        */
 321       LLVMValueRef c23_shifted, exp_mask, bias, exp;
 322       LLVMValueRef maxz_value, maxz0z1_value;
 323
 324       lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
 325
 326       c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
 327       exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
 328
 329       maxz0z1_value = lp_build_max(&flt_scalar_bld,
 330                          LLVMBuildExtractElement(b, attribv[0], twoi, ""),
 331                          LLVMBuildExtractElement(b, attribv[1], twoi, ""));
 332
 333       maxz_value = lp_build_max(&flt_scalar_bld,
 334                       LLVMBuildExtractElement(b, attribv[2], twoi, ""),
 335                       maxz0z1_value);
 336
 337       exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
 338       exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
 339       exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
 340       /* Clamping to zero means mrd will be zero for very small numbers,
 341        * but specs do not indicate this should be prevented by clamping
 342        * mrd to smallest normal number instead. */
 343       exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
 344       exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
 345
 346       bias = LLVMBuildFMul(b, exp,
 347                            lp_build_const_float(gallivm, key->pgon_offset_units),
 348                            "bias");
 349
 350       zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
 351    } else {
 352       /*
 353        * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
 354        */
 355       zoffset = LLVMBuildFAdd(b,
 356                               lp_build_const_float(gallivm, key->pgon_offset_units),
 357                               mult, "zoffset");
 358    }
 359
 360    if (key->pgon_offset_clamp > 0) {
 361       zoffset = lp_build_min(&flt_scalar_bld,
 362                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
 363                              zoffset);
 364    }
 365    else if (key->pgon_offset_clamp < 0) {
 366       zoffset = lp_build_max(&flt_scalar_bld,
 367                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
 368                              zoffset);
 369    }
 370
 371    /* yuck */
 372    shuffles[0] = twoi;
 373    shuffles[1] = lp_build_const_int32(gallivm, 6);
 374    shuffles[2] = LLVMGetUndef(shuf_type);
 375    shuffles[3] = LLVMGetUndef(shuf_type);
 376    z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
 377    shuffles[0] = zeroi;
 378    shuffles[1] = onei;
 379    shuffles[2] = lp_build_const_int32(gallivm, 6);
 380    shuffles[3] = LLVMGetUndef(shuf_type);
 381    z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
 382    zoffset = vec4f_from_scalar(gallivm, zoffset, "");
 383
 384    /* clamp and do offset */
 385    /*
 386     * FIXME I suspect the clamp (is that even right to always clamp to fixed
 387     * 0.0/1.0?) should really be per fragment?
 388     */
 389    z0z1z2 = lp_build_clamp(&bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld.zero, bld.one);
 390
 391    /* insert into args->a0.z, a1.z, a2.z:
 392     */
 393    z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
 394    z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
 395    z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
 396    attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
 397    attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
 398    attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
 399 }
 400
 401 static void
 402 load_attribute(struct gallivm_state *gallivm,
 403                struct lp_setup_args *args,
 404                const struct lp_setup_variant_key *key,
 405                unsigned vert_attr,
 406                LLVMValueRef attribv[3])
 407 {
 408    LLVMBuilderRef b = gallivm->builder;
 409    LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
 410
 411    /* Load the vertex data
 412     */
 413    attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
 414    attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
 415    attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
 416
 417
 418    /* Potentially modify it according to twoside, etc:
 419     */
 420    if (key->twoside) {
 421       if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
 422          lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
 423       else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
 424          lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
 425    }
 426 }
 427
 428 static void
 429 emit_coef4( struct gallivm_state *gallivm,
 430             struct lp_setup_args *args,
 431             unsigned slot,
 432             LLVMValueRef a0,
 433             LLVMValueRef a1,
 434             LLVMValueRef a2)
 435 {
 436    LLVMBuilderRef b = gallivm->builder;
 437    LLVMValueRef dy20_ooa = args->dy20_ooa;
 438    LLVMValueRef dy01_ooa = args->dy01_ooa;
 439    LLVMValueRef dx20_ooa = args->dx20_ooa;
 440    LLVMValueRef dx01_ooa = args->dx01_ooa;
 441    LLVMValueRef x0_center = args->x0_center;
 442    LLVMValueRef y0_center = args->y0_center;
 443
 444    LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
 445    LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
 446
 447    /* Calculate dadx (vec4f)
 448     */
 449    LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
 450    LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
 451    LLVMValueRef dadx          = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
 452
 453    /* Calculate dady (vec4f)
 454     */
 455    LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
 456    LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
 457    LLVMValueRef dady          = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
 458
 459    /* Calculate a0 - the attribute value at the origin
 460     */
 461    LLVMValueRef dadx_x0       = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
 462    LLVMValueRef dady_y0       = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
 463    LLVMValueRef attr_v0       = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
 464    LLVMValueRef attr_0        = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
 465
 466    store_coef(gallivm, args, slot, attr_0, dadx, dady);
 467 }
 468
 469
 470 static void
 471 emit_linear_coef( struct gallivm_state *gallivm,
 472                   struct lp_setup_args *args,
 473                   unsigned slot,
 474                   LLVMValueRef attribv[3])
 475 {
 476    /* nothing to do anymore */
 477    emit_coef4(gallivm,
 478               args, slot,
 479               attribv[0],
 480               attribv[1],
 481               attribv[2]);
 482 }
 483
 484
 485 /**
 486  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 487  * for a triangle.
 488  * We basically multiply the vertex value by 1/w before computing
 489  * the plane coefficients (a0, dadx, dady).
 490  * Later, when we compute the value at a particular fragment position we'll
 491  * divide the interpolated value by the interpolated W at that fragment.
 492  */
 493 static void
 494 apply_perspective_corr( struct gallivm_state *gallivm,
 495                         struct lp_setup_args *args,
 496                         unsigned slot,
 497                         LLVMValueRef attribv[3])
 498 {
 499    LLVMBuilderRef b = gallivm->builder;
 500
 501    /* premultiply by 1/w  (v[0][3] is always 1/w):
 502     */
 503    LLVMValueRef v0_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v0, 0, 3, ""), "v0_oow");
 504    LLVMValueRef v1_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v1, 0, 3, ""), "v1_oow");
 505    LLVMValueRef v2_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v2, 0, 3, ""), "v2_oow");
 506
 507    attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
 508    attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
 509    attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
 510 }
 511
 512
 513 static void
 514 emit_position_coef( struct gallivm_state *gallivm,
 515                     struct lp_setup_args *args,
 516                     int slot,
 517                     LLVMValueRef attribv[3])
 518 {
 519    emit_linear_coef(gallivm, args, slot, attribv);
 520 }
 521
 522
 523 /**
 524  * Applys cylindrical wrapping to vertex attributes if enabled.
 525  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
 526  *
 527  * @param cyl_wrap  TGSI_CYLINDRICAL_WRAP_x flags
 528  */
 529 static void
 530 emit_apply_cyl_wrap(struct gallivm_state *gallivm,
 531                     struct lp_setup_args *args,
 532                     uint cyl_wrap,
 533                     LLVMValueRef attribv[3])
 534
 535 {
 536    LLVMBuilderRef builder = gallivm->builder;
 537    struct lp_type type = lp_float32_vec4_type();
 538    LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, type);
 539    LLVMValueRef pos_half;
 540    LLVMValueRef neg_half;
 541    LLVMValueRef cyl_mask;
 542    LLVMValueRef offset;
 543    LLVMValueRef delta;
 544    LLVMValueRef one;
 545
 546    if (!cyl_wrap)
 547       return;
 548
 549    /* Constants */
 550    pos_half = lp_build_const_vec(gallivm, type, +0.5f);
 551    neg_half = lp_build_const_vec(gallivm, type, -0.5f);
 552    cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4);
 553
 554    one = lp_build_const_vec(gallivm, type, 1.0f);
 555    one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), "");
 556    one = LLVMBuildAnd(builder, one, cyl_mask, "");
 557
 558    /* Edge v0 -> v1 */
 559    delta = LLVMBuildFSub(builder, attribv[1], attribv[0], "");
 560
 561    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
 562    offset     = LLVMBuildAnd(builder, offset, one, "");
 563    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 564    attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
 565
 566    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
 567    offset     = LLVMBuildAnd(builder, offset, one, "");
 568    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 569    attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
 570
 571    /* Edge v1 -> v2 */
 572    delta = LLVMBuildFSub(builder, attribv[2], attribv[1], "");
 573
 574    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
 575    offset     = LLVMBuildAnd(builder, offset, one, "");
 576    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 577    attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
 578
 579    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
 580    offset     = LLVMBuildAnd(builder, offset, one, "");
 581    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 582    attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
 583
 584    /* Edge v2 -> v0 */
 585    delta = LLVMBuildFSub(builder, attribv[0], attribv[2], "");
 586
 587    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
 588    offset     = LLVMBuildAnd(builder, offset, one, "");
 589    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 590    attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
 591
 592    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
 593    offset     = LLVMBuildAnd(builder, offset, one, "");
 594    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
 595    attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
 596 }
 597
 598
 599 /**
 600  * Compute the inputs-> dadx, dady, a0 values.
 601  */
 602 static void
 603 emit_tri_coef( struct gallivm_state *gallivm,
 604                const struct lp_setup_variant_key *key,
 605                struct lp_setup_args *args)
 606 {
 607    unsigned slot;
 608
 609    LLVMValueRef attribs[3];
 610
 611   /* setup interpolation for all the remaining attributes:
 612     */
 613    for (slot = 0; slot < key->num_inputs; slot++) {
 614       switch (key->inputs[slot].interp) {
 615       case LP_INTERP_CONSTANT:
 616          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
 617          if (key->flatshade_first) {
 618             emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
 619          }
 620          else {
 621             emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
 622          }
 623          break;
 624
 625       case LP_INTERP_LINEAR:
 626          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
 627          emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
 628          emit_linear_coef(gallivm, args, slot+1, attribs);
 629          break;
 630
 631       case LP_INTERP_PERSPECTIVE:
 632          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
 633          emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
 634          apply_perspective_corr(gallivm, args, slot+1, attribs);
 635          emit_linear_coef(gallivm, args, slot+1, attribs);
 636          break;
 637
 638       case LP_INTERP_POSITION:
 639          /*
 640           * The generated pixel interpolators will pick up the coeffs from
 641           * slot 0.
 642           */
 643          break;
 644
 645       case LP_INTERP_FACING:
 646          emit_facing_coef(gallivm, args, slot+1);
 647          break;
 648
 649       default:
 650          assert(0);
 651       }
 652    }
 653 }
 654
 655
 656 /* XXX: generic code:
 657  */
 658 static void
 659 set_noalias(LLVMBuilderRef builder,
 660             LLVMValueRef function,
 661             const LLVMTypeRef *arg_types,
 662             int nr_args)
 663 {
 664    int i;
 665    for(i = 0; i < nr_args; ++i)
 666       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
 667          LLVMAddAttribute(LLVMGetParam(function, i),
 668                           LLVMNoAliasAttribute);
 669 }
 670
 671 static void
 672 init_args(struct gallivm_state *gallivm,
 673           const struct lp_setup_variant_key *key,
 674           struct lp_setup_args *args)
 675 {
 676    LLVMBuilderRef b = gallivm->builder;
 677    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
 678    LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
 679    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
 680    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
 681    LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
 682    LLVMValueRef e, f, ef, ooa;
 683    LLVMValueRef shuffles[4];
 684    LLVMValueRef attr_pos[3];
 685    struct lp_type typef4 = lp_type_float_vec(32, 128);
 686
 687    /* The internal position input is in slot zero:
 688     */
 689    load_attribute(gallivm, args, key, 0, attr_pos);
 690
 691    pixel_center = lp_build_const_vec(gallivm, typef4,
 692                                   key->pixel_center_half ? 0.5 : 0.0);
 693
 694    /*
 695     * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
 696     * also offset_tri uses actually xyz in them
 697     */
 698    xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
 699
 700    dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
 701    dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
 702
 703    shuffles[0] = onei;
 704    shuffles[1] = zeroi;
 705    shuffles[2] = LLVMGetUndef(shuf_type);
 706    shuffles[3] = LLVMGetUndef(shuf_type);
 707
 708    dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, LLVMConstVector(shuffles, 4), "");
 709
 710    ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
 711    e = LLVMBuildExtractElement(b, ef, zeroi, "");
 712    f = LLVMBuildExtractElement(b, ef, onei, "");
 713
 714    ooa  = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
 715
 716    ooa = vec4f_from_scalar(gallivm, ooa, "");
 717
 718    /* tri offset calc shares a lot of arithmetic, do it here */
 719    if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
 720       lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
 721    }
 722
 723    dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
 724    dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
 725
 726    args->dy20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
 727    args->dy01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
 728
 729    args->dx20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
 730    args->dx01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
 731
 732    args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
 733    args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
 734
 735    /* might want to merge that with other coef emit in the future */
 736    emit_position_coef(gallivm, args, 0, attr_pos);
 737 }
 738
 739 /**
 740  * Generate the runtime callable function for the coefficient calculation.
 741  *
 742  */
 743 static struct lp_setup_variant *
 744 generate_setup_variant(struct lp_setup_variant_key *key,
 745                        struct llvmpipe_context *lp)
 746 {
 747    struct lp_setup_variant *variant = NULL;
 748    struct gallivm_state *gallivm;
 749    struct lp_setup_args args;
 750    char func_name[256];
 751    LLVMTypeRef vec4f_type;
 752    LLVMTypeRef func_type;
 753    LLVMTypeRef arg_types[7];
 754    LLVMBasicBlockRef block;
 755    LLVMBuilderRef builder;
 756    int64_t t0 = 0, t1;
 757
 758    if (0)
 759       goto fail;
 760
 761    variant = CALLOC_STRUCT(lp_setup_variant);
 762    if (variant == NULL)
 763       goto fail;
 764
 765    variant->gallivm = gallivm = gallivm_create();
 766    if (!variant->gallivm) {
 767       goto fail;
 768    }
 769
 770    builder = gallivm->builder;
 771
 772    if (LP_DEBUG & DEBUG_COUNTERS) {
 773       t0 = os_time_get();
 774    }
 775
 776    memcpy(&variant->key, key, key->size);
 777    variant->list_item_global.base = variant;
 778
 779    util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
 780                  0,
 781                  variant->no);
 782
 783    /* Currently always deal with full 4-wide vertex attributes from
 784     * the vertices.
 785     */
 786
 787    vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
 788
 789    arg_types[0] = LLVMPointerType(vec4f_type, 0);        /* v0 */
 790    arg_types[1] = LLVMPointerType(vec4f_type, 0);        /* v1 */
 791    arg_types[2] = LLVMPointerType(vec4f_type, 0);        /* v2 */
 792    arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
 793    arg_types[4] = LLVMPointerType(vec4f_type, 0);       /* a0, aligned */
 794    arg_types[5] = LLVMPointerType(vec4f_type, 0);       /* dadx, aligned */
 795    arg_types[6] = LLVMPointerType(vec4f_type, 0);       /* dady, aligned */
 796
 797    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
 798                                 arg_types, Elements(arg_types), 0);
 799
 800    variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
 801    if (!variant->function)
 802       goto fail;
 803
 804    LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
 805
 806    args.v0       = LLVMGetParam(variant->function, 0);
 807    args.v1       = LLVMGetParam(variant->function, 1);
 808    args.v2       = LLVMGetParam(variant->function, 2);
 809    args.facing   = LLVMGetParam(variant->function, 3);
 810    args.a0       = LLVMGetParam(variant->function, 4);
 811    args.dadx     = LLVMGetParam(variant->function, 5);
 812    args.dady     = LLVMGetParam(variant->function, 6);
 813
 814    lp_build_name(args.v0, "in_v0");
 815    lp_build_name(args.v1, "in_v1");
 816    lp_build_name(args.v2, "in_v2");
 817    lp_build_name(args.facing, "in_facing");
 818    lp_build_name(args.a0, "out_a0");
 819    lp_build_name(args.dadx, "out_dadx");
 820    lp_build_name(args.dady, "out_dady");
 821
 822    /*
 823     * Function body
 824     */
 825    block = LLVMAppendBasicBlockInContext(gallivm->context,
 826                                          variant->function, "entry");
 827    LLVMPositionBuilderAtEnd(builder, block);
 828
 829    set_noalias(builder, variant->function, arg_types, Elements(arg_types));
 830    init_args(gallivm, &variant->key, &args);
 831    emit_tri_coef(gallivm, &variant->key, &args);
 832
 833    LLVMBuildRetVoid(builder);
 834
 835    gallivm_verify_function(gallivm, variant->function);
 836
 837    gallivm_compile_module(gallivm);
 838
 839    variant->jit_function = (lp_jit_setup_triangle)
 840       gallivm_jit_function(gallivm, variant->function);
 841    if (!variant->jit_function)
 842       goto fail;
 843
 844    /*
 845     * Update timing information:
 846     */
 847    if (LP_DEBUG & DEBUG_COUNTERS) {
 848       t1 = os_time_get();
 849       LP_COUNT_ADD(llvm_compile_time, t1 - t0);
 850       LP_COUNT_ADD(nr_llvm_compiles, 1);
 851    }
 852
 853    return variant;
 854
 855 fail:
 856    if (variant) {
 857       if (variant->function) {
 858          gallivm_free_function(gallivm,
 859                                variant->function,
 860                                variant->jit_function);
 861       }
 862       if (variant->gallivm) {
 863          gallivm_destroy(variant->gallivm);
 864       }
 865       FREE(variant);
 866    }
 867
 868    return NULL;
 869 }
 870
 871
 872
 873 static void
 874 lp_make_setup_variant_key(struct llvmpipe_context *lp,
 875                           struct lp_setup_variant_key *key)
 876 {
 877    struct lp_fragment_shader *fs = lp->fs;
 878    unsigned i;
 879
 880    assert(sizeof key->inputs[0] == sizeof(uint));
 881
 882    key->num_inputs = fs->info.base.num_inputs;
 883    key->flatshade_first = lp->rasterizer->flatshade_first;
 884    key->pixel_center_half = lp->rasterizer->half_pixel_center;
 885    key->twoside = lp->rasterizer->light_twoside;
 886    key->size = Offset(struct lp_setup_variant_key,
 887                       inputs[key->num_inputs]);
 888
 889    key->color_slot  = lp->color_slot [0];
 890    key->bcolor_slot = lp->bcolor_slot[0];
 891    key->spec_slot   = lp->color_slot [1];
 892    key->bspec_slot  = lp->bcolor_slot[1];
 893    assert(key->color_slot  == lp->color_slot [0]);
 894    assert(key->bcolor_slot == lp->bcolor_slot[0]);
 895    assert(key->spec_slot   == lp->color_slot [1]);
 896    assert(key->bspec_slot  == lp->bcolor_slot[1]);
 897
 898    /*
 899     * If depth is floating point, depth bias is calculated with respect
 900     * to the primitive's maximum Z value. Retain the original depth bias
 901     * value until that stage.
 902     */
 903    key->floating_point_depth = lp->floating_point_depth;
 904
 905    if (key->floating_point_depth) {
 906       key->pgon_offset_units = (float) lp->rasterizer->offset_units;
 907    } else {
 908       key->pgon_offset_units =
 909          (float) (lp->rasterizer->offset_units * lp->mrd);
 910    }
 911
 912    key->pgon_offset_scale = lp->rasterizer->offset_scale;
 913    key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
 914    key->pad = 0;
 915    memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
 916    for (i = 0; i < key->num_inputs; i++) {
 917       if (key->inputs[i].interp == LP_INTERP_COLOR) {
 918          if (lp->rasterizer->flatshade)
 919             key->inputs[i].interp = LP_INTERP_CONSTANT;
 920          else
 921             key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
 922       }
 923    }
 924
 925 }
 926
 927
 928 static void
 929 remove_setup_variant(struct llvmpipe_context *lp,
 930                      struct lp_setup_variant *variant)
 931 {
 932    if (gallivm_debug & GALLIVM_DEBUG_IR) {
 933       debug_printf("llvmpipe: del setup_variant #%u total %u\n",
 934                    variant->no, lp->nr_setup_variants);
 935    }
 936
 937    if (variant->function) {
 938       gallivm_free_function(variant->gallivm,
 939                             variant->function,
 940                             variant->jit_function);
 941    }
 942
 943    if (variant->gallivm) {
 944       gallivm_destroy(variant->gallivm);
 945    }
 946
 947    remove_from_list(&variant->list_item_global);
 948    lp->nr_setup_variants--;
 949    FREE(variant);
 950 }
 951
 952
 953
 954 /* When the number of setup variants exceeds a threshold, cull a
 955  * fraction (currently a quarter) of them.
 956  */
 957 static void
 958 cull_setup_variants(struct llvmpipe_context *lp)
 959 {
 960    struct pipe_context *pipe = &lp->pipe;
 961    int i;
 962
 963    /*
 964     * XXX: we need to flush the context until we have some sort of reference
 965     * counting in fragment shaders as they may still be binned
 966     * Flushing alone might not be sufficient we need to wait on it too.
 967     */
 968    llvmpipe_finish(pipe, __FUNCTION__);
 969
 970    for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
 971       struct lp_setup_variant_list_item *item;
 972       if (is_empty_list(&lp->setup_variants_list)) {
 973          break;
 974       }
 975       item = last_elem(&lp->setup_variants_list);
 976       assert(item);
 977       assert(item->base);
 978       remove_setup_variant(lp, item->base);
 979    }
 980 }
 981
 982
 983 /**
 984  * Update fragment/vertex shader linkage state.  This is called just
 985  * prior to drawing something when some fragment-related state has
 986  * changed.
 987  */
 988 void
 989 llvmpipe_update_setup(struct llvmpipe_context *lp)
 990 {
 991    struct lp_setup_variant_key *key = &lp->setup_variant.key;
 992    struct lp_setup_variant *variant = NULL;
 993    struct lp_setup_variant_list_item *li;
 994
 995    lp_make_setup_variant_key(lp, key);
 996
 997    foreach(li, &lp->setup_variants_list) {
 998       if(li->base->key.size == key->size &&
 999          memcmp(&li->base->key, key, key->size) == 0) {
1000          variant = li->base;
1001          break;
1002       }
1003    }
1004
1005    if (variant) {
1006       move_to_head(&lp->setup_variants_list, &variant->list_item_global);
1007    }
1008    else {
1009       if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
1010          cull_setup_variants(lp);
1011       }
1012
1013       variant = generate_setup_variant(key, lp);
1014       if (variant) {
1015          insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
1016          lp->nr_setup_variants++;
1017          llvmpipe_variant_count++;
1018       }
1019    }
1020
1021    lp_setup_set_setup_variant(lp->setup,
1022                               variant);
1023 }
1024
1025 void
1026 lp_delete_setup_variants(struct llvmpipe_context *lp)
1027 {
1028    struct lp_setup_variant_list_item *li;
1029    li = first_elem(&lp->setup_variants_list);
1030    while(!at_end(&lp->setup_variants_list, li)) {
1031       struct lp_setup_variant_list_item *next = next_elem(li);
1032       remove_setup_variant(lp, li->base);
1033       li = next;
1034    }
1035 }
1036
1037 void
1038 lp_dump_setup_coef( const struct lp_setup_variant_key *key,
1039                     const float (*sa0)[4],
1040                     const float (*sdadx)[4],
1041                     const float (*sdady)[4])
1042 {
1043    int i, slot;
1044
1045    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1046       float a0   = sa0  [0][i];
1047       float dadx = sdadx[0][i];
1048       float dady = sdady[0][i];
1049
1050       debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
1051                    "xyzw"[i],
1052                    a0, dadx, dady);
1053    }
1054
1055    for (slot = 0; slot < key->num_inputs; slot++) {
1056       unsigned usage_mask = key->inputs[slot].usage_mask;
1057       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1058          if (usage_mask & (1 << i)) {
1059             float a0   = sa0  [1 + slot][i];
1060             float dadx = sdadx[1 + slot][i];
1061             float dady = sdady[1 + slot][i];
1062
1063             debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
1064                          slot,
1065                          "xyzw"[i],
1066                          a0, dadx, dady);
1067          }
1068       }
1069    }
1070 }