src/gallium/drivers/llvmpipe/lp_state_setup.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "util/u_math.h"
  30 #include "util/u_memory.h"
  31 #include "util/u_simple_list.h"
  32 #include "os/os_time.h"
  33 #include "gallivm/lp_bld_debug.h"
  34 #include "gallivm/lp_bld_init.h"
  35 #include "gallivm/lp_bld_intr.h"
  36 #include "gallivm/lp_bld_flow.h"
  37 #include <llvm-c/Analysis.h>    /* for LLVMVerifyFunction */
  38
  39 #include "lp_perf.h"
  40 #include "lp_debug.h"
  41 #include "lp_flush.h"
  42 #include "lp_screen.h"
  43 #include "lp_context.h"
  44 #include "lp_setup_context.h"
  45 #include "lp_rast.h"
  46 #include "lp_state.h"
  47 #include "lp_state_fs.h"
  48 #include "lp_state_setup.h"
  49
  50
  51
  52 /* currently organized to interpolate full float[4] attributes even
  53  * when some elements are unused.  Later, can pack vertex data more
  54  * closely.
  55  */
  56
  57
  58 struct lp_setup_args
  59 {
  60    /* Function arguments:
  61     */
  62    LLVMValueRef v0;
  63    LLVMValueRef v1;
  64    LLVMValueRef v2;
  65    LLVMValueRef facing;         /* boolean */
  66    LLVMValueRef a0;
  67    LLVMValueRef dadx;
  68    LLVMValueRef dady;
  69
  70    /* Derived:
  71     */
  72    LLVMValueRef x0_center;
  73    LLVMValueRef y0_center;
  74    LLVMValueRef dy20_ooa;
  75    LLVMValueRef dy01_ooa;
  76    LLVMValueRef dx20_ooa;
  77    LLVMValueRef dx01_ooa;
  78
  79    /* For twoside calcs
  80     */
  81    LLVMValueRef det;
  82    LLVMValueRef sign;
  83    LLVMValueRef bcolor_slot;
  84    LLVMValueRef color_slot;
  85
  86 };
  87
  88 static LLVMTypeRef type4f(void)
  89 {
  90    return LLVMVectorType(LLVMFloatType(), 4);
  91 }
  92
  93
  94 /* Equivalent of _mm_setr_ps(a,b,c,d)
  95  */
  96 static LLVMValueRef vec4f(LLVMBuilderRef bld,
  97                           LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
  98                           const char *name)
  99 {
 100    LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
 101    LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
 102    LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
 103    LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
 104
 105    LLVMValueRef res = LLVMGetUndef(type4f());
 106
 107    res = LLVMBuildInsertElement(bld, res, a, i0, "");
 108    res = LLVMBuildInsertElement(bld, res, b, i1, "");
 109    res = LLVMBuildInsertElement(bld, res, c, i2, "");
 110    res = LLVMBuildInsertElement(bld, res, d, i3, name);
 111
 112    return res;
 113 }
 114
 115 /* Equivalent of _mm_set1_ps(a)
 116  */
 117 static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
 118                                       LLVMValueRef a,
 119                                       const char *name)
 120 {
 121    LLVMValueRef res = LLVMGetUndef(type4f());
 122    int i;
 123
 124    for(i = 0; i < 4; ++i) {
 125       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 126       res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
 127    }
 128
 129    return res;
 130 }
 131
 132 static void
 133 store_coef(LLVMBuilderRef builder,
 134            struct lp_setup_args *args,
 135            unsigned slot,
 136            LLVMValueRef a0,
 137            LLVMValueRef dadx,
 138            LLVMValueRef dady)
 139 {
 140    LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
 141
 142    LLVMBuildStore(builder,
 143                   a0,
 144                   LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
 145
 146    LLVMBuildStore(builder,
 147                   dadx,
 148                   LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
 149
 150    LLVMBuildStore(builder,
 151                   dady,
 152                   LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
 153 }
 154
 155
 156
 157 static void
 158 emit_constant_coef4( LLVMBuilderRef builder,
 159                      struct lp_setup_args *args,
 160                      unsigned slot,
 161                      LLVMValueRef vert,
 162                      unsigned attr)
 163 {
 164    LLVMValueRef zero      = LLVMConstReal(LLVMFloatType(), 0.0);
 165    LLVMValueRef zerovec   = vec4f_from_scalar(builder, zero, "zero");
 166    LLVMValueRef idx       = LLVMConstInt(LLVMInt32Type(), attr, 0);
 167    LLVMValueRef attr_ptr  = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr");
 168    LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr");
 169
 170    store_coef(builder, args, slot, vert_attr, zerovec, zerovec);
 171 }
 172
 173
 174
 175 /**
 176  * Setup the fragment input attribute with the front-facing value.
 177  * \param frontface  is the triangle front facing?
 178  */
 179 static void
 180 emit_facing_coef( LLVMBuilderRef builder,
 181                   struct lp_setup_args *args,
 182                   unsigned slot )
 183 {
 184    LLVMValueRef a0_0 = args->facing;
 185    LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
 186    LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
 187    LLVMValueRef a0      = vec4f(builder, a0_0f, zero, zero, zero, "facing");
 188    LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
 189
 190    store_coef(builder, args, slot, a0, zerovec, zerovec);
 191 }
 192
 193
 194 static LLVMValueRef
 195 vert_attrib(LLVMBuilderRef b,
 196             LLVMValueRef vert,
 197             int attr,
 198             int elem,
 199             const char *name)
 200 {
 201    LLVMValueRef idx[2];
 202    idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
 203    idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
 204    return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
 205 }
 206
 207
 208
 209 static void
 210 emit_coef4( LLVMBuilderRef b,
 211             struct lp_setup_args *args,
 212             unsigned slot,
 213             LLVMValueRef a0,
 214             LLVMValueRef a1,
 215             LLVMValueRef a2)
 216 {
 217    LLVMValueRef dy20_ooa = args->dy20_ooa;
 218    LLVMValueRef dy01_ooa = args->dy01_ooa;
 219    LLVMValueRef dx20_ooa = args->dx20_ooa;
 220    LLVMValueRef dx01_ooa = args->dx01_ooa;
 221    LLVMValueRef x0_center = args->x0_center;
 222    LLVMValueRef y0_center = args->y0_center;
 223
 224    /* XXX: using fsub, fmul on vector types -- does this work??
 225     */
 226    LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
 227    LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
 228
 229    /* Calculate dadx (vec4f)
 230     */
 231    LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
 232    LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
 233    LLVMValueRef dadx          = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
 234
 235    /* Calculate dady (vec4f)
 236     */
 237    LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
 238    LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
 239    LLVMValueRef dady          = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
 240
 241    /* Calculate a0 - the attribute value at the origin
 242     */
 243    LLVMValueRef dadx_x0       = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
 244    LLVMValueRef dady_y0       = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
 245    LLVMValueRef attr_v0       = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
 246    LLVMValueRef attr_0        = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
 247
 248    store_coef(b, args, slot, attr_0, dadx, dady);
 249 }
 250
 251
 252 static void
 253 emit_linear_coef( LLVMBuilderRef b,
 254                   struct lp_setup_args *args,
 255                   unsigned slot,
 256                   unsigned vert_attr)
 257 {
 258    LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
 259
 260    LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
 261    LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
 262    LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
 263
 264    emit_coef4(b, args, slot, a0, a1, a2);
 265 }
 266
 267
 268
 269 /**
 270  * Compute a0, dadx and dady for a perspective-corrected interpolant,
 271  * for a triangle.
 272  * We basically multiply the vertex value by 1/w before computing
 273  * the plane coefficients (a0, dadx, dady).
 274  * Later, when we compute the value at a particular fragment position we'll
 275  * divide the interpolated value by the interpolated W at that fragment.
 276  */
 277 static void
 278 emit_perspective_coef( LLVMBuilderRef b,
 279                        struct lp_setup_args *args,
 280                        unsigned slot,
 281                        unsigned vert_attr)
 282 {
 283    /* premultiply by 1/w  (v[0][3] is always 1/w):
 284     */
 285    LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
 286
 287    LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
 288    LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
 289    LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
 290
 291    LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
 292    LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
 293    LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
 294
 295    LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a");
 296    LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a");
 297    LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a");
 298
 299    emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
 300 }
 301
 302
 303 static void
 304 emit_position_coef( LLVMBuilderRef builder,
 305                     struct lp_setup_args *args,
 306                     int slot, int attrib )
 307 {
 308    emit_linear_coef(builder, args, slot, attrib);
 309 }
 310
 311
 312
 313
 314 /**
 315  * Compute the inputs-> dadx, dady, a0 values.
 316  */
 317 static void
 318 emit_tri_coef( LLVMBuilderRef builder,
 319                const struct lp_setup_variant_key *key,
 320                struct lp_setup_args *args )
 321 {
 322    unsigned slot;
 323
 324    /* The internal position input is in slot zero:
 325     */
 326    emit_position_coef(builder, args, 0, 0);
 327
 328    /* setup interpolation for all the remaining attributes:
 329     */
 330    for (slot = 0; slot < key->num_inputs; slot++) {
 331       unsigned vert_attr = key->inputs[slot].src_index;
 332
 333       switch (key->inputs[slot].interp) {
 334       case LP_INTERP_CONSTANT:
 335          if (key->flatshade_first) {
 336             emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr);
 337          }
 338          else {
 339             emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr);
 340          }
 341          break;
 342
 343       case LP_INTERP_LINEAR:
 344          emit_linear_coef(builder, args, slot+1, vert_attr);
 345          break;
 346
 347       case LP_INTERP_PERSPECTIVE:
 348          emit_perspective_coef(builder, args, slot+1, vert_attr);
 349          break;
 350
 351       case LP_INTERP_POSITION:
 352          /*
 353           * The generated pixel interpolators will pick up the coeffs from
 354           * slot 0.
 355           */
 356          break;
 357
 358       case LP_INTERP_FACING:
 359          emit_facing_coef(builder, args, slot+1);
 360          break;
 361
 362       default:
 363          assert(0);
 364       }
 365    }
 366 }
 367
 368
 369 /* XXX: This is generic code, share with fs/vs codegen:
 370  */
 371 static lp_jit_setup_triangle
 372 finalize_function(struct llvmpipe_screen *screen,
 373                   LLVMBuilderRef builder,
 374                   LLVMValueRef function)
 375 {
 376    void *f;
 377
 378    /* Verify the LLVM IR.  If invalid, dump and abort */
 379 #ifdef DEBUG
 380    if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
 381       if (1)
 382          lp_debug_dump_value(function);
 383       abort();
 384    }
 385 #endif
 386
 387    /* Apply optimizations to LLVM IR */
 388    LLVMRunFunctionPassManager(screen->pass, function);
 389
 390    if (gallivm_debug & GALLIVM_DEBUG_IR)
 391    {
 392       /* Print the LLVM IR to stderr */
 393       lp_debug_dump_value(function);
 394       debug_printf("\n");
 395    }
 396
 397    /*
 398     * Translate the LLVM IR into machine code.
 399     */
 400    f = LLVMGetPointerToGlobal(screen->engine, function);
 401
 402    if (gallivm_debug & GALLIVM_DEBUG_ASM)
 403    {
 404       lp_disassemble(f);
 405    }
 406
 407    lp_func_delete_body(function);
 408
 409    return f;
 410 }
 411
 412 /* XXX: Generic code:
 413  */
 414 static void
 415 lp_emit_emms(LLVMBuilderRef builder)
 416 {
 417 #ifdef PIPE_ARCH_X86
 418    /* Avoid corrupting the FPU stack on 32bit OSes. */
 419    lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
 420 #endif
 421 }
 422
 423
 424 /* XXX: generic code:
 425  */
 426 static void
 427 set_noalias(LLVMBuilderRef builder,
 428             LLVMValueRef function,
 429             const LLVMTypeRef *arg_types,
 430             int nr_args)
 431 {
 432    int i;
 433    for(i = 0; i < Elements(arg_types); ++i)
 434       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
 435          LLVMAddAttribute(LLVMGetParam(function, i),
 436                           LLVMNoAliasAttribute);
 437 }
 438
 439 static void
 440 init_args(LLVMBuilderRef b,
 441           struct lp_setup_args *args,
 442           const struct lp_setup_variant *variant)
 443 {
 444    LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
 445    LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
 446
 447    LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
 448    LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
 449
 450    LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
 451    LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
 452
 453    LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
 454                                              variant->key.pixel_center_half ? 0.5 : 0);
 455
 456    LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
 457    LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
 458
 459    LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
 460    LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
 461    LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
 462    LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
 463
 464    LLVMValueRef one  = LLVMConstReal(LLVMFloatType(), 1.0);
 465    LLVMValueRef e    = LLVMBuildFMul(b, dx01, dy20, "e");
 466    LLVMValueRef f    = LLVMBuildFMul(b, dx20, dy01, "f");
 467    LLVMValueRef ooa  = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
 468
 469    LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
 470    LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
 471    LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
 472    LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
 473
 474    args->dy20_ooa  = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
 475    args->dy01_ooa  = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
 476
 477    args->dx20_ooa  = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
 478    args->dx01_ooa  = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
 479
 480    args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
 481    args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
 482 }
 483
 484 static void
 485 set_args_attr(struct llvmpipe_context *lp,
 486            struct lp_setup_args *args)
 487 {
 488    args->color_slot = LLVMConstInt(LLVMInt32Type(), lp->color_slot, 0);
 489    args->bcolor_slot = LLVMConstInt(LLVMInt32Type(), lp->bcolor_slot, 0);
 490    args->sign =  LLVMConstReal(LLVMFloatType(), (lp->rasterizer->front_ccw ? -1.0f : 1.0f));
 491 }
 492
 493 static void
 494 lp_twoside(LLVMBuilderRef b,
 495            struct lp_setup_args *args,
 496            const struct lp_setup_variant_key *key)
 497 {
 498    struct lp_build_if_state if_state;
 499
 500    LLVMValueRef a0_old, a1_old, a2_old;
 501    LLVMValueRef a0_new, a1_new, a2_new;
 502
 503    LLVMValueRef idx1 = args->color_slot;
 504    LLVMValueRef idx2 = args->bcolor_slot;
 505
 506    LLVMValueRef facing = args->facing;
 507    LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for loop condition */
 508
 509 #if 0
 510 /*Probably can delete this, just tried to follow draw_pipe_twoside way of
 511   calculating det*/
 512    /* edge vectors: e = v0 - v2, f = v1 - v2 */
 513    LLVMValueRef e = LLVMBuildFSub(b, args->v0, args->v2, "e");
 514    LLVMValueRef f = LLVMBuildFSub(b, args->v1, args->v2, "f");
 515    LLVMValueRef dx02 = vert_attrib(b, e, 0, 0, "dx02");
 516    LLVMValueRef dy02 = vert_attrib(b, e, 0, 1, "dy02");
 517    LLVMValueRef dx12 = vert_attrib(b, f, 0, 0, "dx12");
 518    LLVMValueRef dy12 = vert_attrib(b, f, 0, 1, "dy12");
 519
 520    /* det = cross(e,f).z */
 521    LLVMValueRef dx02_dy12  = LLVMBuildFMul(b, dx02, dy12, "dx02_dy12");
 522    LLVMValueRef dy02_dx12  = LLVMBuildFMul(b, dy02, dx12, "dy02_dx12");
 523    LLVMValueRef det  = LLVMBuildFSub(b, dx02_dy12, dy02_dx12, "det");
 524    args->det = det;
 525    LLVMValueRef result = LLVMBuildFMul(b, det, args->sign, "dy02_dx12");
 526 #endif
 527
 528    lp_build_if(&if_state, b, front_facing);
 529    {
 530       /* swap the front and back attrib values */
 531       a0_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx1, 1, ""), "v0a");
 532       a1_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx1, 1, ""), "v1a");
 533       a2_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx1, 1, ""), "v2a");
 534
 535       a0_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a");
 536       a1_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a");
 537       a2_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a");
 538
 539       LLVMBuildStore(b, a0_new, LLVMBuildGEP(b, args->v0, &idx1, 1, ""));
 540       LLVMBuildStore(b, a1_new, LLVMBuildGEP(b, args->v1, &idx1, 1, ""));
 541       LLVMBuildStore(b, a2_new, LLVMBuildGEP(b, args->v2, &idx1, 1, ""));
 542    }
 543    lp_build_endif(&if_state);
 544
 545 }
 546
 547
 548 /**
 549  * Generate the runtime callable function for the coefficient calculation.
 550  *
 551  */
 552 static struct lp_setup_variant *
 553 generate_setup_variant(struct llvmpipe_screen *screen,
 554                        struct lp_setup_variant_key *key,
 555                        struct llvmpipe_context *lp)
 556 {
 557    struct lp_setup_variant *variant = NULL;
 558    struct lp_setup_args args;
 559    char func_name[256];
 560    LLVMTypeRef vec4f_type;
 561    LLVMTypeRef func_type;
 562    LLVMTypeRef arg_types[7];
 563    LLVMBasicBlockRef block;
 564    LLVMBuilderRef builder;
 565    int64_t t0, t1;
 566
 567    if (0)
 568       goto fail;
 569
 570    variant = CALLOC_STRUCT(lp_setup_variant);
 571    if (variant == NULL)
 572       goto fail;
 573
 574    if (LP_DEBUG & DEBUG_COUNTERS) {
 575       t0 = os_time_get();
 576    }
 577
 578    memcpy(&variant->key, key, key->size);
 579    variant->list_item_global.base = variant;
 580
 581    util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
 582                  0,
 583                  variant->no);
 584
 585    /* Currently always deal with full 4-wide vertex attributes from
 586     * the vertices.
 587     */
 588
 589    vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
 590
 591    arg_types[0] = LLVMPointerType(vec4f_type, 0);        /* v0 */
 592    arg_types[1] = LLVMPointerType(vec4f_type, 0);        /* v1 */
 593    arg_types[2] = LLVMPointerType(vec4f_type, 0);        /* v2 */
 594    arg_types[3] = LLVMInt32Type();                      /* facing */
 595    arg_types[4] = LLVMPointerType(vec4f_type, 0);       /* a0, aligned */
 596    arg_types[5] = LLVMPointerType(vec4f_type, 0);       /* dadx, aligned */
 597    arg_types[6] = LLVMPointerType(vec4f_type, 0);       /* dady, aligned */
 598
 599    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 600
 601    variant->function = LLVMAddFunction(screen->module, func_name, func_type);
 602    if (!variant->function)
 603       goto fail;
 604
 605    LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
 606
 607    args.v0       = LLVMGetParam(variant->function, 0);
 608    args.v1       = LLVMGetParam(variant->function, 1);
 609    args.v2       = LLVMGetParam(variant->function, 2);
 610    args.facing   = LLVMGetParam(variant->function, 3);
 611    args.a0       = LLVMGetParam(variant->function, 4);
 612    args.dadx     = LLVMGetParam(variant->function, 5);
 613    args.dady     = LLVMGetParam(variant->function, 6);
 614
 615    lp_build_name(args.v0, "in_v0");
 616    lp_build_name(args.v1, "in_v1");
 617    lp_build_name(args.v2, "in_v2");
 618    lp_build_name(args.facing, "in_facing");
 619    lp_build_name(args.a0, "out_a0");
 620    lp_build_name(args.dadx, "out_dadx");
 621    lp_build_name(args.dady, "out_dady");
 622
 623    /*
 624     * Function body
 625     */
 626    block = LLVMAppendBasicBlock(variant->function, "entry");
 627    builder = LLVMCreateBuilder();
 628    LLVMPositionBuilderAtEnd(builder, block);
 629
 630    set_noalias(builder, variant->function, arg_types, Elements(arg_types));
 631    init_args(builder, &args, variant);
 632    if (variant->key.twoside){
 633       set_args_attr(lp, &args);
 634       lp_twoside(builder, &args, &variant->key);
 635    }
 636    emit_tri_coef(builder, &variant->key, &args);
 637
 638    lp_emit_emms(builder);
 639    LLVMBuildRetVoid(builder);
 640    LLVMDisposeBuilder(builder);
 641
 642    variant->jit_function = finalize_function(screen, builder,
 643                                              variant->function);
 644    if (!variant->jit_function)
 645       goto fail;
 646
 647    /*
 648     * Update timing information:
 649     */
 650    if (LP_DEBUG & DEBUG_COUNTERS) {
 651       t1 = os_time_get();
 652       LP_COUNT_ADD(llvm_compile_time, t1 - t0);
 653       LP_COUNT_ADD(nr_llvm_compiles, 1);
 654    }
 655
 656    return variant;
 657
 658 fail:
 659    if (variant) {
 660       if (variant->function) {
 661          if (variant->jit_function)
 662             LLVMFreeMachineCodeForFunction(screen->engine,
 663                                            variant->function);
 664          LLVMDeleteFunction(variant->function);
 665       }
 666       FREE(variant);
 667    }
 668
 669    return NULL;
 670 }
 671
 672
 673
 674 static void
 675 lp_make_setup_variant_key(struct llvmpipe_context *lp,
 676                           struct lp_setup_variant_key *key)
 677 {
 678    struct lp_fragment_shader *fs = lp->fs;
 679    unsigned i;
 680
 681    assert(sizeof key->inputs[0] == sizeof(ushort));
 682
 683    key->num_inputs = fs->info.base.num_inputs;
 684    key->flatshade_first = lp->rasterizer->flatshade_first;
 685    key->pixel_center_half = lp->rasterizer->gl_rasterization_rules;
 686    key->twoside = lp->rasterizer->light_twoside;
 687    key->size = Offset(struct lp_setup_variant_key,
 688                       inputs[key->num_inputs]);
 689    key->pad = 0;
 690
 691    memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
 692    for (i = 0; i < key->num_inputs; i++) {
 693       if (key->inputs[i].interp == LP_INTERP_COLOR) {
 694          if (lp->rasterizer->flatshade)
 695             key->inputs[i].interp = LP_INTERP_CONSTANT;
 696          else
 697             key->inputs[i].interp = LP_INTERP_LINEAR;
 698       }
 699    }
 700
 701 }
 702
 703
 704 static void
 705 remove_setup_variant(struct llvmpipe_context *lp,
 706                      struct lp_setup_variant *variant)
 707 {
 708    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
 709
 710    if (gallivm_debug & GALLIVM_DEBUG_IR) {
 711       debug_printf("llvmpipe: del setup_variant #%u total %u\n",
 712                    variant->no, lp->nr_setup_variants);
 713    }
 714
 715    if (variant->function) {
 716       if (variant->jit_function)
 717          LLVMFreeMachineCodeForFunction(screen->engine,
 718                                         variant->function);
 719       LLVMDeleteFunction(variant->function);
 720    }
 721
 722    remove_from_list(&variant->list_item_global);
 723    lp->nr_setup_variants--;
 724    FREE(variant);
 725 }
 726
 727
 728
 729 /* When the number of setup variants exceeds a threshold, cull a
 730  * fraction (currently a quarter) of them.
 731  */
 732 static void
 733 cull_setup_variants(struct llvmpipe_context *lp)
 734 {
 735    struct pipe_context *pipe = &lp->pipe;
 736    int i;
 737
 738    /*
 739     * XXX: we need to flush the context until we have some sort of reference
 740     * counting in fragment shaders as they may still be binned
 741     * Flushing alone might not be sufficient we need to wait on it too.
 742     */
 743    llvmpipe_finish(pipe, __FUNCTION__);
 744
 745    for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
 746       struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list);
 747       remove_setup_variant(lp, item->base);
 748    }
 749 }
 750
 751
 752 /**
 753  * Update fragment/vertex shader linkage state.  This is called just
 754  * prior to drawing something when some fragment-related state has
 755  * changed.
 756  */
 757 void
 758 llvmpipe_update_setup(struct llvmpipe_context *lp)
 759 {
 760    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
 761
 762    struct lp_setup_variant_key *key = &lp->setup_variant.key;
 763    struct lp_setup_variant *variant = NULL;
 764    struct lp_setup_variant_list_item *li;
 765
 766    lp_make_setup_variant_key(lp, key);
 767
 768    foreach(li, &lp->setup_variants_list) {
 769       if(li->base->key.size == key->size &&
 770          memcmp(&li->base->key, key, key->size) == 0) {
 771          variant = li->base;
 772          break;
 773       }
 774    }
 775
 776    if (variant) {
 777       move_to_head(&lp->setup_variants_list, &variant->list_item_global);
 778    }
 779    else {
 780       if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
 781          cull_setup_variants(lp);
 782       }
 783
 784       variant = generate_setup_variant(screen, key, lp);
 785       insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
 786       lp->nr_setup_variants++;
 787    }
 788
 789    lp_setup_set_setup_variant(lp->setup,
 790                               variant);
 791 }
 792
 793 void
 794 lp_delete_setup_variants(struct llvmpipe_context *lp)
 795 {
 796    struct lp_setup_variant_list_item *li;
 797    li = first_elem(&lp->setup_variants_list);
 798    while(!at_end(&lp->setup_variants_list, li)) {
 799       struct lp_setup_variant_list_item *next = next_elem(li);
 800       remove_setup_variant(lp, li->base);
 801       li = next;
 802    }
 803 }
 804
 805 void
 806 lp_dump_setup_coef( const struct lp_setup_variant_key *key,
 807                     const float (*sa0)[4],
 808                     const float (*sdadx)[4],
 809                     const float (*sdady)[4])
 810 {
 811    int i, slot;
 812
 813    for (i = 0; i < NUM_CHANNELS; i++) {
 814       float a0   = sa0  [0][i];
 815       float dadx = sdadx[0][i];
 816       float dady = sdady[0][i];
 817
 818       debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
 819                    "xyzw"[i],
 820                    a0, dadx, dady);
 821    }
 822
 823    for (slot = 0; slot < key->num_inputs; slot++) {
 824       unsigned usage_mask = key->inputs[slot].usage_mask;
 825       for (i = 0; i < NUM_CHANNELS; i++) {
 826          if (usage_mask & (1 << i)) {
 827             float a0   = sa0  [1 + slot][i];
 828             float dadx = sdadx[1 + slot][i];
 829             float dady = sdady[1 + slot][i];
 830
 831             debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
 832                          slot,
 833                          "xyzw"[i],
 834                          a0, dadx, dady);
 835          }
 836       }
 837    }
 838 }