Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
index e3ef9e425fa6bc21fddf1fdb516acc02fdab40a1..8759c38cabbf840b496e8a149541a8bf26be44dc 100644 (file)
@@ -1,3 +1,30 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include "draw_llvm.h"
 
 #include "draw_context.h"
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_init.h"
 
 #include "tgsi/tgsi_exec.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "util/u_cpu_detect.h"
+#include "util/u_pointer.h"
 #include "util/u_string.h"
 
 #include <llvm-c/Transforms/Scalar.h>
 
 #define DEBUG_STORE 0
 
-
 /* generates the draw jit function */
 static void
 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
 
 static void
 init_globals(struct draw_llvm *llvm)
@@ -33,12 +64,19 @@ init_globals(struct draw_llvm *llvm)
 
    /* struct draw_jit_texture */
    {
-      LLVMTypeRef elem_types[4];
+      LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
 
       elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
       elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
-      elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
-      elem_types[DRAW_JIT_TEXTURE_DATA]   = LLVMPointerType(LLVMInt8Type(), 0);
+      elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
+      elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+      elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
+         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+      elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
+         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+      elem_types[DRAW_JIT_TEXTURE_DATA] =
+         LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+                       DRAW_MAX_TEXTURE_LEVELS);
 
       texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
@@ -48,9 +86,18 @@ init_globals(struct draw_llvm *llvm)
       LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
                              llvm->target, texture_type,
                              DRAW_JIT_TEXTURE_HEIGHT);
-      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_DEPTH);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_LAST_LEVEL);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
                              llvm->target, texture_type,
-                             DRAW_JIT_TEXTURE_STRIDE);
+                             DRAW_JIT_TEXTURE_ROW_STRIDE);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_IMG_STRIDE);
       LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
                              llvm->target, texture_type,
                              DRAW_JIT_TEXTURE_DATA);
@@ -68,7 +115,8 @@ init_globals(struct draw_llvm *llvm)
 
       elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
       elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
-      elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+      elem_types[2] = LLVMArrayType(texture_type,
+                                    PIPE_MAX_VERTEX_SAMPLERS); /* textures */
 
       context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
@@ -78,7 +126,7 @@ init_globals(struct draw_llvm *llvm)
                              llvm->target, context_type, 1);
       LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
                              llvm->target, context_type,
-                             DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+                             DRAW_JIT_CTX_TEXTURES);
       LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
                            llvm->target, context_type);
 
@@ -160,9 +208,11 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems)
 struct draw_llvm *
 draw_llvm_create(struct draw_context *draw)
 {
-   struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+   struct draw_llvm *llvm;
 
-   util_cpu_detect();
+   llvm = CALLOC_STRUCT( draw_llvm );
+   if (!llvm)
+      return NULL;
 
    llvm->draw = draw;
    llvm->engine = draw->engine;
@@ -178,27 +228,50 @@ draw_llvm_create(struct draw_context *draw)
 
    llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
    LLVMAddTargetData(llvm->target, llvm->pass);
-   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-    * but there are more on SVN. */
-   /* TODO: Add more passes */
-   LLVMAddConstantPropagationPass(llvm->pass);
-   if(util_cpu_caps.has_sse4_1) {
-      /* FIXME: There is a bug in this pass, whereby the combination of fptosi
-       * and sitofp (necessary for trunc/floor/ceil/round implementation)
-       * somehow becomes invalid code.
+
+   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+       * but there are more on SVN. */
+      /* TODO: Add more passes */
+
+      LLVMAddCFGSimplificationPass(llvm->pass);
+
+      if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
+         /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
+          * avoid generating bad code.
+          * Test with piglit glsl-vs-sqrt-zero test.
+          */
+         LLVMAddConstantPropagationPass(llvm->pass);
+         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+      }
+      else {
+         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+         LLVMAddConstantPropagationPass(llvm->pass);
+      }
+
+      if(util_cpu_caps.has_sse4_1) {
+         /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+          * and sitofp (necessary for trunc/floor/ceil/round implementation)
+          * somehow becomes invalid code.
+          */
+         LLVMAddInstructionCombiningPass(llvm->pass);
+      }
+      LLVMAddGVNPass(llvm->pass);
+   } else {
+      /* We need at least this pass to prevent the backends to fail in
+       * unexpected ways.
        */
-      LLVMAddInstructionCombiningPass(llvm->pass);
+      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
    }
-   LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
-   LLVMAddGVNPass(llvm->pass);
-   LLVMAddCFGSimplificationPass(llvm->pass);
 
    init_globals(llvm);
 
+   if (gallivm_debug & GALLIVM_DEBUG_IR) {
+      LLVMDumpModule(llvm->module);
+   }
 
-#if 0
-   LLVMDumpModule(llvm->module);
-#endif
+   llvm->nr_variants = 0;
+   make_empty_list(&llvm->vs_variants_list);
 
    return llvm;
 }
@@ -206,19 +279,40 @@ draw_llvm_create(struct draw_context *draw)
 void
 draw_llvm_destroy(struct draw_llvm *llvm)
 {
-   free(llvm);
+   LLVMDisposePassManager(llvm->pass);
+
+   FREE(llvm);
 }
 
 struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+                        unsigned num_inputs,
+                        const struct draw_llvm_variant_key *key)
 {
-   struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+   struct draw_llvm_variant *variant;
+   struct llvm_vertex_shader *shader =
+      llvm_vertex_shader(llvm->draw->vs.vertex_shader);
+
+   variant = MALLOC(sizeof *variant +
+                   shader->variant_key_size -
+                   sizeof variant->key);
+   if (variant == NULL)
+      return NULL;
 
-   draw_llvm_make_variant_key(llvm, &variant->key);
+   variant->llvm = llvm;
+
+   memcpy(&variant->key, key, shader->variant_key_size);
 
    llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
 
    draw_llvm_generate(llvm, variant);
+   draw_llvm_generate_elts(llvm, variant);
+
+   variant->shader = shader;
+   variant->list_item_global.base = variant;
+   variant->list_item_local.base = variant;
+   /*variant->no = */shader->variants_created++;
+   variant->list_item_global.base = variant;
 
    return variant;
 }
@@ -228,11 +322,13 @@ generate_vs(struct draw_llvm *llvm,
             LLVMBuilderRef builder,
             LLVMValueRef (*outputs)[NUM_CHANNELS],
             const LLVMValueRef (*inputs)[NUM_CHANNELS],
-            LLVMValueRef context_ptr)
+            LLVMValueRef context_ptr,
+            struct lp_build_sampler_soa *draw_sampler)
 {
    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
    struct lp_type vs_type;
    LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+   struct lp_build_sampler_soa *sampler = 0;
 
    memset(&vs_type, 0, sizeof vs_type);
    vs_type.floating = TRUE; /* floating point values */
@@ -244,7 +340,14 @@ generate_vs(struct draw_llvm *llvm,
    num_vs = 4;              /* number of vertices per block */
 #endif
 
-   /*tgsi_dump(tokens, 0);*/
+   if (gallivm_debug & GALLIVM_DEBUG_IR) {
+      tgsi_dump(tokens, 0);
+   }
+
+   if (llvm->draw->num_sampler_views &&
+       llvm->draw->num_samplers)
+      sampler = draw_sampler;
+
    lp_build_tgsi_soa(builder,
                      tokens,
                      vs_type,
@@ -253,7 +356,8 @@ generate_vs(struct draw_llvm *llvm,
                      NULL /*pos*/,
                      inputs,
                      outputs,
-                     NULL/*sampler*/);
+                     sampler,
+                     &llvm->draw->vs.vertex_shader->info);
 }
 
 #if DEBUG_STORE
@@ -280,16 +384,30 @@ generate_fetch(LLVMBuilderRef builder,
                LLVMValueRef *res,
                struct pipe_vertex_element *velem,
                LLVMValueRef vbuf,
-               LLVMValueRef index)
+               LLVMValueRef index,
+               LLVMValueRef instance_id)
 {
    LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
    LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
                                            &indices, 1, "");
    LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
+   LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
    LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
-   LLVMValueRef stride = LLVMBuildMul(builder,
-                                      vb_stride,
-                                      index, "");
+   LLVMValueRef cond;
+   LLVMValueRef stride;
+
+   if (velem->instance_divisor) {
+      /* array index = instance_id / instance_divisor */
+      index = LLVMBuildUDiv(builder, instance_id,
+                            LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
+                            "instance_divisor");
+   }
+
+   /* limit index to min(inex, vb_max_index) */
+   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+   index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
+
+   stride = LLVMBuildMul(builder, vb_stride, index, "");
 
    vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
 
@@ -554,20 +672,22 @@ convert_to_aos(LLVMBuilderRef builder,
 static void
 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 {
-   LLVMTypeRef arg_types[7];
+   LLVMTypeRef arg_types[8];
    LLVMTypeRef func_type;
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef start, end, count, stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+   LLVMValueRef instance_id;
    struct draw_context *draw = llvm->draw;
    unsigned i, j;
    struct lp_build_context bld;
    struct lp_build_loop_state lp_loop;
-   struct lp_type vs_type = lp_type_float_vec(32);
    const int max_vertices = 4;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   void *code;
+   struct lp_build_sampler_soa *sampler = 0;
 
    arg_types[0] = llvm->context_ptr_type;           /* context */
    arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
@@ -576,6 +696,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    arg_types[4] = LLVMInt32Type();                  /* count */
    arg_types[5] = LLVMInt32Type();                  /* stride */
    arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
+   arg_types[7] = LLVMInt32Type();                  /* instance_id */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
@@ -592,6 +713,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    count        = LLVMGetParam(variant->function, 4);
    stride       = LLVMGetParam(variant->function, 5);
    vb_ptr       = LLVMGetParam(variant->function, 6);
+   instance_id  = LLVMGetParam(variant->function, 7);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
@@ -600,6 +722,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    lp_build_name(count, "count");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
+   lp_build_name(instance_id, "instance_id");
 
    /*
     * Function body
@@ -609,12 +732,17 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   lp_build_context_init(&bld, builder, vs_type);
+   lp_build_context_init(&bld, builder, lp_type_int(32));
 
    end = lp_build_add(&bld, start, count);
 
    step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
 
+   /* code generated texture sampling */
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
+
 #if DEBUG_STORE
    lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
                    start, end, step);
@@ -645,7 +773,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
                                            &vb_index, 1, "");
             generate_fetch(builder, vbuffers_ptr,
-                           &aos_attribs[j][i], velem, vb, true_index);
+                           &aos_attribs[j][i], velem, vb, true_index,
+                           instance_id);
          }
       }
       convert_to_soa(builder, aos_attribs, inputs,
@@ -656,7 +785,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
                   builder,
                   outputs,
                   ptr_aos,
-                  context_ptr);
+                  context_ptr,
+                  sampler);
 
       convert_to_aos(builder, io, outputs,
                      draw->vs.vertex_shader->info.num_outputs,
@@ -664,6 +794,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    }
    lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
 
+   sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+   /* Avoid corrupting the FPU stack on 32bit OSes. */
+   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
@@ -673,36 +810,291 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
     */
 #ifdef DEBUG
    if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
-      LLVMDumpValue(variant->function);
+      lp_debug_dump_value(variant->function);
       assert(0);
    }
 #endif
 
    LLVMRunFunctionPassManager(llvm->pass, variant->function);
 
-   if (0) {
-      LLVMDumpValue(variant->function);
+   if (gallivm_debug & GALLIVM_DEBUG_IR) {
+      lp_debug_dump_value(variant->function);
       debug_printf("\n");
    }
-   variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
 
-   if (0)
-      lp_disassemble(variant->jit_func);
+   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
+   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
+
+   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+      lp_disassemble(code);
+   }
+   lp_func_delete_body(variant->function);
 }
 
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
-                           struct draw_llvm_variant_key *key)
+
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 {
-   memset(key, 0, sizeof(struct draw_llvm_variant_key));
+   LLVMTypeRef arg_types[8];
+   LLVMTypeRef func_type;
+   LLVMValueRef context_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
+   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+   LLVMValueRef instance_id;
+   struct draw_context *draw = llvm->draw;
+   unsigned i, j;
+   struct lp_build_context bld;
+   struct lp_build_loop_state lp_loop;
+   const int max_vertices = 4;
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef fetch_max;
+   void *code;
+   struct lp_build_sampler_soa *sampler = 0;
+
+   arg_types[0] = llvm->context_ptr_type;               /* context */
+   arg_types[1] = llvm->vertex_header_ptr_type;         /* vertex_header */
+   arg_types[2] = llvm->buffer_ptr_type;                /* vbuffers */
+   arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0);  /* fetch_elts * */
+   arg_types[4] = LLVMInt32Type();                      /* fetch_count */
+   arg_types[5] = LLVMInt32Type();                      /* stride */
+   arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
+   arg_types[7] = LLVMInt32Type();                      /* instance_id */
 
+   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
+                                            func_type);
+   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+   for(i = 0; i < Elements(arg_types); ++i)
+      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+                          LLVMNoAliasAttribute);
+
+   context_ptr  = LLVMGetParam(variant->function_elts, 0);
+   io_ptr       = LLVMGetParam(variant->function_elts, 1);
+   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
+   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
+   fetch_count  = LLVMGetParam(variant->function_elts, 4);
+   stride       = LLVMGetParam(variant->function_elts, 5);
+   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
+   instance_id  = LLVMGetParam(variant->function_elts, 7);
+
+   lp_build_name(context_ptr, "context");
+   lp_build_name(io_ptr, "io");
+   lp_build_name(vbuffers_ptr, "vbuffers");
+   lp_build_name(fetch_elts, "fetch_elts");
+   lp_build_name(fetch_count, "fetch_count");
+   lp_build_name(stride, "stride");
+   lp_build_name(vb_ptr, "vb");
+   lp_build_name(instance_id, "instance_id");
+
+   /*
+    * Function body
+    */
+
+   block = LLVMAppendBasicBlock(variant->function_elts, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   lp_build_context_init(&bld, builder, lp_type_int(32));
+
+   step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+   /* code generated texture sampling */
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
+
+   fetch_max = LLVMBuildSub(builder, fetch_count,
+                            LLVMConstInt(LLVMInt32Type(), 1, 0),
+                            "fetch_max");
+
+   lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
+   {
+      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+      LLVMValueRef io;
+      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+      io_itr = lp_loop.counter;
+      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+                      io_itr, io, lp_loop.counter);
+#endif
+      for (i = 0; i < NUM_CHANNELS; ++i) {
+         LLVMValueRef true_index = LLVMBuildAdd(
+            builder,
+            lp_loop.counter,
+            LLVMConstInt(LLVMInt32Type(), i, 0), "");
+         LLVMValueRef fetch_ptr;
+
+         /* make sure we're not out of bounds which can happen
+          * if fetch_count % 4 != 0, because on the last iteration
+          * a few of the 4 vertex fetches will be out of bounds */
+         true_index = lp_build_min(&bld, true_index, fetch_max);
+
+         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+                                  &true_index, 1, "");
+         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+            LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+                                                 velem->vertex_buffer_index,
+                                                 0);
+            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+                                           &vb_index, 1, "");
+            generate_fetch(builder, vbuffers_ptr,
+                           &aos_attribs[j][i], velem, vb, true_index,
+                           instance_id);
+         }
+      }
+      convert_to_soa(builder, aos_attribs, inputs,
+                     draw->pt.nr_vertex_elements);
+
+      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+      generate_vs(llvm,
+                  builder,
+                  outputs,
+                  ptr_aos,
+                  context_ptr,
+                  sampler);
+
+      convert_to_aos(builder, io, outputs,
+                     draw->vs.vertex_shader->info.num_outputs,
+                     max_vertices);
+   }
+   lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+
+   sampler->destroy(sampler);
+
+#ifdef PIPE_ARCH_X86
+   /* Avoid corrupting the FPU stack on 32bit OSes. */
+   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+
+   /*
+    * Translate the LLVM IR into machine code.
+    */
+#ifdef DEBUG
+   if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
+      lp_debug_dump_value(variant->function_elts);
+      assert(0);
+   }
+#endif
+
+   LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
+
+   if (gallivm_debug & GALLIVM_DEBUG_IR) {
+      lp_debug_dump_value(variant->function_elts);
+      debug_printf("\n");
+   }
+
+   code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
+   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
+
+   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+      lp_disassemble(code);
+   }
+   lp_func_delete_body(variant->function_elts);
+}
+
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
+{
+   unsigned i;
+   struct draw_llvm_variant_key *key;
+   struct lp_sampler_static_state *sampler;
+
+   key = (struct draw_llvm_variant_key *)store;
+
+   /* Presumably all variants of the shader should have the same
+    * number of vertex elements - ie the number of shader inputs.
+    */
    key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
 
+   /* All variants of this shader will have the same value for
+    * nr_samplers.  Not yet trying to compact away holes in the
+    * sampler array.
+    */
+   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+   sampler = draw_llvm_variant_key_samplers(key);
+
    memcpy(key->vertex_element,
           llvm->draw->pt.vertex_element,
           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+   
+   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
+
+   for (i = 0 ; i < key->nr_samplers; i++) {
+      lp_sampler_static_state(&sampler[i],
+                             llvm->draw->sampler_views[i],
+                             llvm->draw->samplers[i]);
+   }
+
+   return key;
+}
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+                             unsigned sampler_idx,
+                             uint32_t width, uint32_t height, uint32_t depth,
+                             uint32_t last_level,
+                             uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+                             uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+                             const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+   unsigned j;
+   struct draw_jit_texture *jit_tex;
+
+   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
+
+
+   jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
+
+   jit_tex->width = width;
+   jit_tex->height = height;
+   jit_tex->depth = depth;
+   jit_tex->last_level = last_level;
+
+   for (j = 0; j <= last_level; j++) {
+      jit_tex->data[j] = data[j];
+      jit_tex->row_stride[j] = row_stride[j];
+      jit_tex->img_stride[j] = img_stride[j];
+   }
+}
+
+void
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
+{
+   struct draw_llvm *llvm = variant->llvm;
+   struct draw_context *draw = llvm->draw;
+
+   if (variant->function_elts) {
+      if (variant->function_elts)
+         LLVMFreeMachineCodeForFunction(draw->engine,
+                                        variant->function_elts);
+      LLVMDeleteFunction(variant->function_elts);
+   }
+
+   if (variant->function) {
+      if (variant->function)
+         LLVMFreeMachineCodeForFunction(draw->engine,
+                                        variant->function);
+      LLVMDeleteFunction(variant->function);
+   }
 
-   memcpy(&key->vs,
-          &llvm->draw->vs.vertex_shader->state,
-          sizeof(struct pipe_shader_state));
+   remove_from_list(&variant->list_item_local);
+   variant->shader->variants_cached--;
+   remove_from_list(&variant->list_item_global);
+   llvm->nr_variants--;
+   FREE(variant);
 }