llvmpipe: Add vertex id support.
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
index 56c26f57ccedeb392d2d73b3ac7d08c7226eb8ba..e08221eb3929a820130bf8d3a1c5d13f43d5ae19 100644 (file)
@@ -65,8 +65,13 @@ static void
 draw_llvm_garbage_collect_callback(void *cb_data)
 {
    struct draw_llvm *llvm = (struct draw_llvm *) cb_data;
+   struct draw_context *draw = llvm->draw;
    struct draw_llvm_variant_list_item *li;
 
+   /* Ensure prepare will be run and shaders recompiled */
+   assert(!draw->suspend_flushing);
+   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
    /* free all shader variants */
    li = first_elem(&llvm->vs_variants_list);
    while (!at_end(&llvm->vs_variants_list, li)) {
@@ -86,17 +91,15 @@ draw_llvm_garbage_collect_callback(void *cb_data)
 
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
-
-static void
-draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
+                   boolean elts);
 
 
 /**
  * Create LLVM type for struct draw_jit_texture
  */
 static LLVMTypeRef
-create_jit_texture_type(struct gallivm_state *gallivm)
+create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef texture_type;
@@ -123,10 +126,14 @@ create_jit_texture_type(struct gallivm_state *gallivm)
    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
 
+#if HAVE_LLVM < 0x0300
+   LLVMAddTypeName(gallivm->module, struct_name, texture_type);
+
    /* Make sure the target's struct layout cache doesn't return
     * stale/invalid data.
     */
    LLVMInvalidateStructLayout(gallivm->target, texture_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
                           target, texture_type,
@@ -176,7 +183,7 @@ create_jit_texture_type(struct gallivm_state *gallivm)
  */
 static LLVMTypeRef
 create_jit_context_type(struct gallivm_state *gallivm,
-                        LLVMTypeRef texture_type)
+                        LLVMTypeRef texture_type, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
@@ -185,15 +192,18 @@ create_jit_context_type(struct gallivm_state *gallivm,
 
    elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */
    elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */
-   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 12), 0); /* planes */
+   elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
+                                                 DRAW_TOTAL_CLIP_PLANES), 0);
    elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
    elem_types[4] = LLVMArrayType(texture_type,
                                  PIPE_MAX_VERTEX_SAMPLERS); /* textures */
-
    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
+#if HAVE_LLVM < 0x0300
+   LLVMAddTypeName(gallivm->module, struct_name, context_type);
 
    LLVMInvalidateStructLayout(gallivm->target, context_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
                           target, context_type, 0);
@@ -215,20 +225,24 @@ create_jit_context_type(struct gallivm_state *gallivm,
  * Create LLVM type for struct pipe_vertex_buffer
  */
 static LLVMTypeRef
-create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
+create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
-   LLVMTypeRef elem_types[3];
+   LLVMTypeRef elem_types[4];
    LLVMTypeRef vb_type;
 
    elem_types[0] =
    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
-   elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
+   elem_types[2] =
+   elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
 
    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                      Elements(elem_types), 0);
+#if HAVE_LLVM < 0x0300
+   LLVMAddTypeName(gallivm->module, struct_name, vb_type);
 
    LLVMInvalidateStructLayout(gallivm->target, vb_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
                           target, vb_type, 0);
@@ -248,20 +262,24 @@ static LLVMTypeRef
 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
 {
    LLVMTargetDataRef target = gallivm->target;
-   LLVMTypeRef elem_types[3];
+   LLVMTypeRef elem_types[4];
    LLVMTypeRef vertex_header;
    char struct_name[24];
 
    util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
 
-   elem_types[0]  = LLVMIntTypeInContext(gallivm->context, 32);
-   elem_types[1]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
-   elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
+   elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
+   elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
+   elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
+   elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
 
    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
                                            Elements(elem_types), 0);
+#if HAVE_LLVM < 0x0300
+   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
 
    LLVMInvalidateStructLayout(gallivm->target, vertex_header);
+#endif
 
    /* these are bit-fields and we can't take address of them
       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
@@ -280,11 +298,15 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
                           target, vertex_header,
                           DRAW_JIT_VERTEX_CLIP);
+   LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
+                          target, vertex_header,
+                          DRAW_JIT_VERTEX_PRE_CLIP_POS);
    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
                           target, vertex_header,
                           DRAW_JIT_VERTEX_DATA);
 
-   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
+   assert(LLVMABISizeOfType(target, vertex_header) ==
+          offsetof(struct vertex_header, data[data_elems]));
 
    return vertex_header;
 }
@@ -299,19 +321,15 @@ create_jit_types(struct draw_llvm *llvm)
    struct gallivm_state *gallivm = llvm->gallivm;
    LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
 
-   texture_type = create_jit_texture_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "texture", texture_type);
+   texture_type = create_jit_texture_type(gallivm, "texture");
 
-   context_type = create_jit_context_type(gallivm, texture_type);
-   LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type);
+   context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context");
    llvm->context_ptr_type = LLVMPointerType(context_type, 0);
 
    buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
-   LLVMAddTypeName(gallivm->module, "buffer", buffer_type);
    llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
 
-   vb_type = create_jit_vertex_buffer_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type);
+   vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
    llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
 }
 
@@ -423,8 +441,8 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
 
    llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
 
-   draw_llvm_generate(llvm, variant);
-   draw_llvm_generate_elts(llvm, variant);
+   draw_llvm_generate(llvm, variant, FALSE);  /* linear */
+   draw_llvm_generate(llvm, variant, TRUE);   /* elts */
 
    variant->shader = shader;
    variant->list_item_global.base = variant;
@@ -439,9 +457,9 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
 static void
 generate_vs(struct draw_llvm *llvm,
             LLVMBuilderRef builder,
-            LLVMValueRef (*outputs)[NUM_CHANNELS],
-            const LLVMValueRef (*inputs)[NUM_CHANNELS],
-            LLVMValueRef system_values_array,
+            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
+            const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
+            const struct lp_bld_tgsi_system_values *system_values,
             LLVMValueRef context_ptr,
             struct lp_build_sampler_soa *draw_sampler,
             boolean clamp_vertex_color)
@@ -473,14 +491,14 @@ generate_vs(struct draw_llvm *llvm,
                      vs_type,
                      NULL /*struct lp_build_mask_context *mask*/,
                      consts_ptr,
-                     system_values_array,
+                     system_values,
                      NULL /*pos*/,
                      inputs,
                      outputs,
                      sampler,
                      &llvm->draw->vs.vertex_shader->info);
 
-   if (clamp_vertex_color) {
+   {
       LLVMValueRef out;
       unsigned chan, attrib;
       struct lp_build_context bld;
@@ -488,14 +506,22 @@ generate_vs(struct draw_llvm *llvm,
       lp_build_context_init(&bld, llvm->gallivm, vs_type);
 
       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
-         for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
             if (outputs[attrib][chan]) {
                switch (info->output_semantic_name[attrib]) {
                case TGSI_SEMANTIC_COLOR:
                case TGSI_SEMANTIC_BCOLOR:
-                  out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-                  out = lp_build_clamp(&bld, out, bld.zero, bld.one);
-                  LLVMBuildStore(builder, out, outputs[attrib][chan]);
+                  if (clamp_vertex_color) {
+                     out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+                     out = lp_build_clamp(&bld, out, bld.zero, bld.one);
+                     LLVMBuildStore(builder, out, outputs[attrib][chan]);
+                  }
+                  break;
+               case TGSI_SEMANTIC_FOG:
+                  if (chan == 1 || chan == 2)
+                     LLVMBuildStore(builder, bld.zero, outputs[attrib][chan]);
+                  else if (chan == 3)
+                     LLVMBuildStore(builder, bld.one, outputs[attrib][chan]);
                   break;
                }
             }
@@ -614,19 +640,19 @@ aos_to_soa(struct gallivm_state *gallivm,
 
 static void
 soa_to_aos(struct gallivm_state *gallivm,
-           LLVMValueRef soa[NUM_CHANNELS],
-           LLVMValueRef aos[NUM_CHANNELS])
+           LLVMValueRef soa[TGSI_NUM_CHANNELS],
+           LLVMValueRef aos[TGSI_NUM_CHANNELS])
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef comp;
    int i = 0;
 
-   debug_assert(NUM_CHANNELS == 4);
+   debug_assert(TGSI_NUM_CHANNELS == 4);
 
    aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
    aos[1] = aos[2] = aos[3] = aos[0];
 
-   for (i = 0; i < NUM_CHANNELS; ++i) {
+   for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
       LLVMValueRef channel = lp_build_const_int32(gallivm, i);
 
       comp = LLVMBuildExtractElement(builder, soa[i],
@@ -651,13 +677,13 @@ soa_to_aos(struct gallivm_state *gallivm,
 
 static void
 convert_to_soa(struct gallivm_state *gallivm,
-               LLVMValueRef (*aos)[NUM_CHANNELS],
-               LLVMValueRef (*soa)[NUM_CHANNELS],
+               LLVMValueRef (*aos)[TGSI_NUM_CHANNELS],
+               LLVMValueRef (*soa)[TGSI_NUM_CHANNELS],
                int num_attribs)
 {
    int i;
 
-   debug_assert(NUM_CHANNELS == 4);
+   debug_assert(TGSI_NUM_CHANNELS == 4);
 
    for (i = 0; i < num_attribs; ++i) {
       LLVMValueRef val0 = aos[i][0];
@@ -682,23 +708,29 @@ store_aos(struct gallivm_state *gallivm,
           LLVMValueRef io_ptr,
           LLVMValueRef index,
           LLVMValueRef value,
-          LLVMValueRef clipmask)
+          LLVMValueRef clipmask, boolean have_clipdist)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr);
    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
    LLVMValueRef indices[3];
-   LLVMValueRef val, shift;
+   LLVMValueRef val;
+   int vertex_id_pad_edgeflag;
 
    indices[0] = lp_build_const_int32(gallivm, 0);
    indices[1] = index;
    indices[2] = lp_build_const_int32(gallivm, 0);
 
-   /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
-   val = lp_build_const_int32(gallivm, 0xffff1);
-   shift = lp_build_const_int32(gallivm, 12);
-   val = LLVMBuildShl(builder, val, shift, "");
-   /* add clipmask:12 */   
+   /* If this assertion fails, it means we need to update the bit twidding
+    * code here.  See struct vertex_header in draw_private.h.
+    */
+   assert(DRAW_TOTAL_CLIP_PLANES==14);
+   /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
+   vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
+   if (have_clipdist)
+      vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
+   val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
+   /* OR with the clipmask */
    val = LLVMBuildOr(builder, val, clipmask, "");               
 
    /* store vertex header */
@@ -757,10 +789,11 @@ store_aos(struct gallivm_state *gallivm,
 static void
 store_aos_array(struct gallivm_state *gallivm,
                 LLVMValueRef io_ptr,
-                LLVMValueRef aos[NUM_CHANNELS],
+                LLVMValueRef aos[TGSI_NUM_CHANNELS],
                 int attrib,
                 int num_outputs,
-                LLVMValueRef clipmask)
+                LLVMValueRef clipmask,
+                boolean have_clipdist)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
@@ -771,7 +804,7 @@ store_aos_array(struct gallivm_state *gallivm,
    LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
    LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
    
-   debug_assert(NUM_CHANNELS == 4);
+   debug_assert(TGSI_NUM_CHANNELS == 4);
 
    io0_ptr = LLVMBuildGEP(builder, io_ptr,
                           &ind0, 1, "");
@@ -796,20 +829,20 @@ store_aos_array(struct gallivm_state *gallivm,
                    io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
 #endif
    /* store for each of the 4 vertices */
-   store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0);
-   store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1);
-   store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2);
-   store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3);
+   store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0, have_clipdist);
+   store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1, have_clipdist);
+   store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2, have_clipdist);
+   store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3, have_clipdist);
 }
 
 
 static void
 convert_to_aos(struct gallivm_state *gallivm,
                LLVMValueRef io,
-               LLVMValueRef (*outputs)[NUM_CHANNELS],
+               LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                LLVMValueRef clipmask,
                int num_outputs,
-               int max_vertices)
+               int max_vertices, boolean have_clipdist)
 {
    LLVMBuilderRef builder = gallivm->builder;
    unsigned chan, attrib;
@@ -820,7 +853,7 @@ convert_to_aos(struct gallivm_state *gallivm,
    for (attrib = 0; attrib < num_outputs; ++attrib) {
       LLVMValueRef soa[4];
       LLVMValueRef aos[4];
-      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
          if (outputs[attrib][chan]) {
             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
@@ -840,7 +873,7 @@ convert_to_aos(struct gallivm_state *gallivm,
                       aos,
                       attrib,
                       num_outputs,
-                      clipmask);
+                      clipmask, have_clipdist);
    }
 #if DEBUG_STORE
    lp_build_printf(builder, "   # storing end\n");
@@ -852,11 +885,14 @@ convert_to_aos(struct gallivm_state *gallivm,
  * Stores original vertex positions in clip coordinates
  * There is probably a more efficient way to do this, 4 floats at once
  * rather than extracting each element one by one.
+ * idx is the output to store things too, if pre_clip_pos is set
+ * we store the pos to the idx, if not we store the clipvertex to it.
  */
 static void
 store_clip(struct gallivm_state *gallivm,
            LLVMValueRef io_ptr,           
-           LLVMValueRef (*outputs)[NUM_CHANNELS])
+           LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
+           boolean pre_clip_pos, int idx)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef out[4];
@@ -875,20 +911,27 @@ store_clip(struct gallivm_state *gallivm,
    indices[0] =
    indices[1] = lp_build_const_int32(gallivm, 0);
    
-   out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
-   out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
-   out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
-   out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/  
+   out[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 x2 x3*/
+   out[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 y2 y3*/
+   out[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 z2 z3*/
+   out[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 w2 w3*/
 
    io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
    io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
    io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
    io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
 
-   clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
-   clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
-   clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
-   clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
+   if (!pre_clip_pos) {
+      clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
+      clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
+      clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
+      clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
+   } else {
+      clip_ptr0 = draw_jit_header_pre_clip_pos(gallivm, io0_ptr);
+      clip_ptr1 = draw_jit_header_pre_clip_pos(gallivm, io1_ptr);
+      clip_ptr2 = draw_jit_header_pre_clip_pos(gallivm, io2_ptr);
+      clip_ptr3 = draw_jit_header_pre_clip_pos(gallivm, io3_ptr);
+   }
 
    for (i = 0; i<4; i++) {
       clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */
@@ -940,7 +983,7 @@ vec4f_from_scalar(struct gallivm_state *gallivm,
 static void
 generate_viewport(struct draw_llvm *llvm,
                   LLVMBuilderRef builder,
-                  LLVMValueRef (*outputs)[NUM_CHANNELS],
+                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                   LLVMValueRef context_ptr)
 {
    int i;
@@ -990,34 +1033,62 @@ generate_viewport(struct draw_llvm *llvm,
  * Returns clipmask as 4xi32 bitmask for the 4 vertices
  */
 static LLVMValueRef 
-generate_clipmask(struct gallivm_state *gallivm,
-                  LLVMValueRef (*outputs)[NUM_CHANNELS],
+generate_clipmask(struct draw_llvm *llvm,
+                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                   boolean clip_xy,
                   boolean clip_z,
                   boolean clip_user,
                   boolean clip_halfz,
-                  unsigned nr,
-                  LLVMValueRef context_ptr)
+                  unsigned ucp_enable,
+                  LLVMValueRef context_ptr,
+                  boolean *have_clipdist)
 {
+   struct gallivm_state *gallivm = llvm->gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef mask; /* stores the <4xi32> clipmasks */     
    LLVMValueRef test, temp; 
    LLVMValueRef zero, shift;
    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
+   LLVMValueRef cv_x, cv_y, cv_z, cv_w;
    LLVMValueRef plane1, planes, plane_ptr, sum;
-   unsigned i;
    struct lp_type f32_type = lp_type_float_vec(32); 
+   const unsigned pos = draw_current_shader_position_output(llvm->draw);
+   const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
+   int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
+   bool have_cd = false;
+   unsigned cd[2];
+
+   cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
+   cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
+  
+   if (cd[0] != pos || cd[1] != pos)
+      have_cd = true;
 
    mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
    temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
    zero = lp_build_const_vec(gallivm, f32_type, 0);                    /* 0.0f 0.0f 0.0f 0.0f */
    shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1);    /* 1 1 1 1 */
 
-   /* Assuming position stored at output[0] */
-   pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
-   pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
-   pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
-   pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/   
+   /*
+    * load clipvertex and position from correct locations.
+    * if they are the same just load them once.
+    */
+   pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 x2 x3*/
+   pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 y2 y3*/
+   pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 z2 z3*/
+   pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 w2 w3*/
+
+   if (clip_user && cv != pos) {
+      cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 x2 x3*/
+      cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 y2 y3*/
+      cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 z2 z3*/
+      cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 w2 w3*/
+   } else {
+      cv_x = pos_x;
+      cv_y = pos_y;
+      cv_z = pos_z;
+      cv_w = pos_w;
+   }
 
    /* Cliptest, for hardwired planes */
    if (clip_xy) {
@@ -1073,44 +1144,64 @@ generate_clipmask(struct gallivm_state *gallivm,
    if (clip_user) {
       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
       LLVMValueRef indices[3];
-      temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 32);
 
       /* userclip planes */
-      for (i = 6; i < nr; i++) {
-         indices[0] = lp_build_const_int32(gallivm, 0);
-         indices[1] = lp_build_const_int32(gallivm, i);
-
-         indices[2] = lp_build_const_int32(gallivm, 0);
-         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
-         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
-         planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
-         sum = LLVMBuildFMul(builder, planes, pos_x, "");
-
-         indices[2] = lp_build_const_int32(gallivm, 1);
-         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
-         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); 
-         planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
-         test = LLVMBuildFMul(builder, planes, pos_y, "");
-         sum = LLVMBuildFAdd(builder, sum, test, "");
-         
-         indices[2] = lp_build_const_int32(gallivm, 2);
-         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
-         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); 
-         planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
-         test = LLVMBuildFMul(builder, planes, pos_z, "");
-         sum = LLVMBuildFAdd(builder, sum, test, "");
-
-         indices[2] = lp_build_const_int32(gallivm, 3);
-         plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
-         plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); 
-         planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
-         test = LLVMBuildFMul(builder, planes, pos_w, "");
-         sum = LLVMBuildFAdd(builder, sum, test, "");
-
-         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
-         temp = LLVMBuildShl(builder, temp, shift, "");
-         test = LLVMBuildAnd(builder, test, temp, ""); 
-         mask = LLVMBuildOr(builder, mask, test, "");
+      while (ucp_enable) {
+         unsigned plane_idx = ffs(ucp_enable)-1;
+         ucp_enable &= ~(1 << plane_idx);
+         plane_idx += 6;
+
+         if (have_cd && num_written_clipdistance) {
+            LLVMValueRef clipdist;
+            int i;
+            i = plane_idx - 6;
+
+            *have_clipdist = TRUE;
+            if (i < 4) {
+               clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
+            } else {
+               clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
+            }
+            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
+            temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
+            test = LLVMBuildAnd(builder, test, temp, "");
+            mask = LLVMBuildOr(builder, mask, test, "");
+         } else {
+            indices[0] = lp_build_const_int32(gallivm, 0);
+            indices[1] = lp_build_const_int32(gallivm, plane_idx);
+
+            indices[2] = lp_build_const_int32(gallivm, 0);
+            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
+            planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
+            sum = LLVMBuildFMul(builder, planes, cv_x, "");
+
+            indices[2] = lp_build_const_int32(gallivm, 1);
+            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
+            planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
+            test = LLVMBuildFMul(builder, planes, cv_y, "");
+            sum = LLVMBuildFAdd(builder, sum, test, "");
+
+            indices[2] = lp_build_const_int32(gallivm, 2);
+            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
+            planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
+            test = LLVMBuildFMul(builder, planes, cv_z, "");
+            sum = LLVMBuildFAdd(builder, sum, test, "");
+
+            indices[2] = lp_build_const_int32(gallivm, 3);
+            plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+            plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
+            planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
+            test = LLVMBuildFMul(builder, planes, cv_w, "");
+            sum = LLVMBuildFAdd(builder, sum, test, "");
+
+            test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
+            temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
+            test = LLVMBuildAnd(builder, test, temp, "");
+            mask = LLVMBuildOr(builder, mask, test, "");
+         }
       }
    }
    return mask;
@@ -1142,7 +1233,8 @@ clipmask_bool(struct gallivm_state *gallivm,
 
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
+                   boolean elts)
 {
    struct gallivm_state *gallivm = llvm->gallivm;
    LLVMContextRef context = gallivm->context;
@@ -1152,359 +1244,201 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
-   LLVMValueRef start, end, count, stride, step, io_itr;
+   LLVMValueRef end, start;
+   LLVMValueRef count, fetch_elts, fetch_count;
+   LLVMValueRef stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
-   LLVMValueRef instance_id;
-   LLVMValueRef system_values_array;
+   LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+   LLVMValueRef one = lp_build_const_int32(gallivm, 1);
    struct draw_context *draw = llvm->draw;
    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
    unsigned i, j;
    struct lp_build_context bld;
    struct lp_build_loop_state lp_loop;
    const int max_vertices = 4;
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef fetch_max;
    void *code;
    struct lp_build_sampler_soa *sampler = 0;
    LLVMValueRef ret, ret_ptr;
-   boolean bypass_viewport = variant->key.bypass_viewport;
-   boolean enable_cliptest = variant->key.clip_xy || 
-                             variant->key.clip_z  ||
-                             variant->key.clip_user;
-   
+   const boolean bypass_viewport = variant->key.bypass_viewport;
+   const boolean enable_cliptest = variant->key.clip_xy || 
+                                   variant->key.clip_z  ||
+                                   variant->key.clip_user;
+   LLVMValueRef variant_func;
+   const unsigned pos = draw_current_shader_position_output(llvm->draw);
+   const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
+   boolean have_clipdist = FALSE;
+   struct lp_bld_tgsi_system_values system_values;
+
+   memset(&system_values, 0, sizeof(system_values));
+
    arg_types[0] = get_context_ptr_type(llvm);       /* context */
    arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */
    arg_types[2] = get_buffer_ptr_type(llvm);        /* vbuffers */
-   arg_types[3] = int32_type;                       /* start */
-   arg_types[4] = int32_type;                       /* count */
+   if (elts)
+      arg_types[3] = LLVMPointerType(int32_type, 0);/* fetch_elts * */
+   else
+      arg_types[3] = int32_type;                    /* start */
+   arg_types[4] = int32_type;                       /* fetch_count / count */
    arg_types[5] = int32_type;                       /* stride */
    arg_types[6] = get_vb_ptr_type(llvm);            /* pipe_vertex_buffer's */
    arg_types[7] = int32_type;                       /* instance_id */
 
    func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
 
-   variant->function = LLVMAddFunction(gallivm->module, "draw_llvm_shader",
-                                       func_type);
-   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
-   for (i = 0; i < Elements(arg_types); ++i)
-      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
-
-   context_ptr  = LLVMGetParam(variant->function, 0);
-   io_ptr       = LLVMGetParam(variant->function, 1);
-   vbuffers_ptr = LLVMGetParam(variant->function, 2);
-   start        = LLVMGetParam(variant->function, 3);
-   count        = LLVMGetParam(variant->function, 4);
-   stride       = LLVMGetParam(variant->function, 5);
-   vb_ptr       = LLVMGetParam(variant->function, 6);
-   instance_id  = LLVMGetParam(variant->function, 7);
-
-   lp_build_name(context_ptr, "context");
-   lp_build_name(io_ptr, "io");
-   lp_build_name(vbuffers_ptr, "vbuffers");
-   lp_build_name(start, "start");
-   lp_build_name(count, "count");
-   lp_build_name(stride, "stride");
-   lp_build_name(vb_ptr, "vb");
-   lp_build_name(instance_id, "instance_id");
-
-   /*
-    * Function body
-    */
-
-   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function, "entry");
-   builder = gallivm->builder;
-   assert(builder);
-   LLVMPositionBuilderAtEnd(builder, block);
-
-   lp_build_context_init(&bld, llvm->gallivm, lp_type_int(32));
-
-   system_values_array = lp_build_system_values_array(gallivm, vs_info,
-                                                      instance_id, NULL);
-
-   end = lp_build_add(&bld, start, count);
-
-   step = lp_build_const_int32(gallivm, max_vertices);
-
-   /* function will return non-zero i32 value if any clipped vertices */     
-   ret_ptr = lp_build_alloca(gallivm, int32_type, "");   
-   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
-
-   /* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(
-      draw_llvm_variant_key_samplers(&variant->key),
-      context_ptr);
-
-#if DEBUG_STORE
-   lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
-                   start, end, step);
-#endif
-   lp_build_loop_begin(&lp_loop, llvm->gallivm, start);
-   {
-      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
-      LLVMValueRef io;
-      LLVMValueRef clipmask;   /* holds the clipmask value */
-      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
-
-      io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
-      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
-#if DEBUG_STORE
-      lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
-                      io_itr, io, lp_loop.counter);
-#endif
-      for (i = 0; i < NUM_CHANNELS; ++i) {
-         LLVMValueRef true_index = LLVMBuildAdd(
-            builder,
-            lp_loop.counter,
-            lp_build_const_int32(gallivm, i), "");
-         for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
-            struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
-            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
-            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
-                                           &vb_index, 1, "");
-            generate_fetch(llvm->gallivm, vbuffers_ptr,
-                           &aos_attribs[j][i], velem, vb, true_index,
-                           instance_id);
-         }
-      }
-      convert_to_soa(gallivm, aos_attribs, inputs,
-                     draw->pt.nr_vertex_elements);
-
-      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
-      generate_vs(llvm,
-                  builder,
-                  outputs,
-                  ptr_aos,
-                  system_values_array,
-                  context_ptr,
-                  sampler,
-                  variant->key.clamp_vertex_color);
-
-      /* store original positions in clip before further manipulation */
-      store_clip(gallivm, io, outputs);
-
-      /* do cliptest */
-      if (enable_cliptest) {
-         /* allocate clipmask, assign it integer type */
-         clipmask = generate_clipmask(gallivm, outputs,
-                                      variant->key.clip_xy,
-                                      variant->key.clip_z, 
-                                      variant->key.clip_user,
-                                      variant->key.clip_halfz,
-                                      variant->key.nr_planes,
-                                      context_ptr);
-         /* return clipping boolean value for function */
-         clipmask_bool(gallivm, clipmask, ret_ptr);
-      }
-      else {
-         clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);    
-      }
-      
-      /* do viewport mapping */
-      if (!bypass_viewport) {
-         generate_viewport(llvm, builder, outputs, context_ptr);
-      }
-
-      /* store clipmask in vertex header and positions in data */
-      convert_to_aos(gallivm, io, outputs, clipmask,
-                     vs_info->num_outputs, max_vertices);
-   }
+   variant_func = LLVMAddFunction(gallivm->module,
+                                  elts ? "draw_llvm_shader_elts" : "draw_llvm_shader",
+                                  func_type);
 
-   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
-
-   sampler->destroy(sampler);
-
-   ret = LLVMBuildLoad(builder, ret_ptr,"");
-   LLVMBuildRet(builder, ret);
-      
-   /*
-    * Translate the LLVM IR into machine code.
-    */
-#ifdef DEBUG
-   if (LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
-      lp_debug_dump_value(variant->function);
-      assert(0);
-   }
-#endif
-
-   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function);
+   if (elts)
+      variant->function_elts = variant_func;
+   else
+      variant->function = variant_func;
 
-   if (gallivm_debug & GALLIVM_DEBUG_IR) {
-      lp_debug_dump_value(variant->function);
-      debug_printf("\n");
-   }
-
-   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function);
-   variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
-
-   if (gallivm_debug & GALLIVM_DEBUG_ASM) {
-      lp_disassemble(code);
-   }
-   lp_func_delete_body(variant->function);
-}
-
-
-static void
-draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
-{
-   struct gallivm_state *gallivm = llvm->gallivm;
-   LLVMContextRef context = gallivm->context;
-   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[8];
-   LLVMTypeRef func_type;
-   LLVMValueRef context_ptr;
-   LLVMBasicBlockRef block;
-   LLVMBuilderRef builder;
-   LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
-   LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
-   LLVMValueRef instance_id;
-   LLVMValueRef system_values_array;
-   struct draw_context *draw = llvm->draw;
-   const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
-   unsigned i, j;
-   struct lp_build_context bld;
-   struct lp_build_loop_state lp_loop;
-   const int max_vertices = 4;
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
-   LLVMValueRef fetch_max;
-   void *code;
-   struct lp_build_sampler_soa *sampler = 0;
-   LLVMValueRef ret, ret_ptr;
-   boolean bypass_viewport = variant->key.bypass_viewport;
-   boolean enable_cliptest = variant->key.clip_xy || 
-                             variant->key.clip_z  ||
-                             variant->key.clip_user;
-   
-   arg_types[0] = get_context_ptr_type(llvm);           /* context */
-   arg_types[1] = get_vertex_header_ptr_type(llvm);     /* vertex_header */
-   arg_types[2] = get_buffer_ptr_type(llvm);            /* vbuffers */
-   arg_types[3] = LLVMPointerType(int32_type, 0);       /* fetch_elts * */
-   arg_types[4] = int32_type;                           /* fetch_count */
-   arg_types[5] = int32_type;                           /* stride */
-   arg_types[6] = get_vb_ptr_type(llvm);                /* pipe_vertex_buffer's */
-   arg_types[7] = int32_type;                           /* instance_id */
-
-   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
-
-   variant->function_elts = LLVMAddFunction(gallivm->module, "draw_llvm_shader_elts", func_type);
-   LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
    for (i = 0; i < Elements(arg_types); ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+         LLVMAddAttribute(LLVMGetParam(variant_func, i),
                           LLVMNoAliasAttribute);
 
-   context_ptr  = LLVMGetParam(variant->function_elts, 0);
-   io_ptr       = LLVMGetParam(variant->function_elts, 1);
-   vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
-   fetch_elts   = LLVMGetParam(variant->function_elts, 3);
-   fetch_count  = LLVMGetParam(variant->function_elts, 4);
-   stride       = LLVMGetParam(variant->function_elts, 5);
-   vb_ptr       = LLVMGetParam(variant->function_elts, 6);
-   instance_id  = LLVMGetParam(variant->function_elts, 7);
+   context_ptr               = LLVMGetParam(variant_func, 0);
+   io_ptr                    = LLVMGetParam(variant_func, 1);
+   vbuffers_ptr              = LLVMGetParam(variant_func, 2);
+   stride                    = LLVMGetParam(variant_func, 5);
+   vb_ptr                    = LLVMGetParam(variant_func, 6);
+   system_values.instance_id = LLVMGetParam(variant_func, 7);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
    lp_build_name(vbuffers_ptr, "vbuffers");
-   lp_build_name(fetch_elts, "fetch_elts");
-   lp_build_name(fetch_count, "fetch_count");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
-   lp_build_name(instance_id, "instance_id");
+   lp_build_name(system_values.instance_id, "instance_id");
+
+   if (elts) {
+      fetch_elts   = LLVMGetParam(variant_func, 3);
+      fetch_count  = LLVMGetParam(variant_func, 4);
+      lp_build_name(fetch_elts, "fetch_elts");
+      lp_build_name(fetch_count, "fetch_count");
+      start = count = NULL;
+   }
+   else {
+      start        = LLVMGetParam(variant_func, 3);
+      count        = LLVMGetParam(variant_func, 4);
+      lp_build_name(start, "start");
+      lp_build_name(count, "count");
+      fetch_elts = fetch_count = NULL;
+   }
 
    /*
     * Function body
     */
 
-   block = LLVMAppendBasicBlockInContext(gallivm->context, variant->function_elts, "entry");
+   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
    builder = gallivm->builder;
    LLVMPositionBuilderAtEnd(builder, block);
 
    lp_build_context_init(&bld, gallivm, lp_type_int(32));
 
-   system_values_array = lp_build_system_values_array(gallivm, vs_info,
-                                                      instance_id, NULL);
-
-
-   step = lp_build_const_int32(gallivm, max_vertices);
+   /* function will return non-zero i32 value if any clipped vertices */
+   ret_ptr = lp_build_alloca(gallivm, int32_type, "");
+   LLVMBuildStore(builder, zero, ret_ptr);
 
    /* code generated texture sampling */
    sampler = draw_llvm_sampler_soa_create(
       draw_llvm_variant_key_samplers(&variant->key),
       context_ptr);
 
-   fetch_max = LLVMBuildSub(builder, fetch_count,
-                            lp_build_const_int32(gallivm, 1),
-                            "fetch_max");
+   if (elts) {
+      start = zero;
+      end = fetch_count;
+   }
+   else {
+      end = lp_build_add(&bld, start, count);
+   }
 
-   /* function returns non-zero i32 value if any clipped vertices */
-   ret_ptr = lp_build_alloca(gallivm, int32_type, ""); 
-   LLVMBuildStore(builder, lp_build_const_int32(gallivm, 0), ret_ptr);
+   step = lp_build_const_int32(gallivm, max_vertices);
 
-   lp_build_loop_begin(&lp_loop, gallivm, lp_build_const_int32(gallivm, 0));
+   fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
+
+   lp_build_loop_begin(&lp_loop, gallivm, start);
    {
-      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
-      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = { { 0 } };
       LLVMValueRef io;
       LLVMValueRef clipmask;   /* holds the clipmask value */
-      const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+      const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
+
+      if (elts)
+         io_itr = lp_loop.counter;
+      else
+         io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
 
-      io_itr = lp_loop.counter;
       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
 #if DEBUG_STORE
       lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
                       io_itr, io, lp_loop.counter);
 #endif
-      for (i = 0; i < NUM_CHANNELS; ++i) {
-         LLVMValueRef true_index = LLVMBuildAdd(
-            builder,
-            lp_loop.counter,
-            lp_build_const_int32(gallivm, i), "");
-         LLVMValueRef fetch_ptr;
+      system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32));
+      for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
+         LLVMValueRef true_index =
+            LLVMBuildAdd(builder,
+                         lp_loop.counter,
+                         lp_build_const_int32(gallivm, i), "");
 
          /* make sure we're not out of bounds which can happen
           * if fetch_count % 4 != 0, because on the last iteration
           * a few of the 4 vertex fetches will be out of bounds */
          true_index = lp_build_min(&bld, true_index, fetch_max);
 
-         fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
-                                  &true_index, 1, "");
-         true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+         if (elts) {
+            LLVMValueRef fetch_ptr;
+            fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+                                     &true_index, 1, "");
+            true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+         }
+         
+         system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder,
+                                                          system_values.vertex_id, true_index,
+                                                          lp_build_const_int32(gallivm, i), "");
          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
-            LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index);
-            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
-                                           &vb_index, 1, "");
+            LLVMValueRef vb_index =
+               lp_build_const_int32(gallivm, velem->vertex_buffer_index);
+            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
             generate_fetch(gallivm, vbuffers_ptr,
                            &aos_attribs[j][i], velem, vb, true_index,
-                           instance_id);
+                           system_values.instance_id);
          }
       }
       convert_to_soa(gallivm, aos_attribs, inputs,
                      draw->pt.nr_vertex_elements);
 
-      ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+      ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
       generate_vs(llvm,
                   builder,
                   outputs,
                   ptr_aos,
-                  system_values_array,
+                  &system_values,
                   context_ptr,
                   sampler,
                   variant->key.clamp_vertex_color);
 
       /* store original positions in clip before further manipulation */
-      store_clip(gallivm, io, outputs);
+      store_clip(gallivm, io, outputs, 0, cv);
+      store_clip(gallivm, io, outputs, 1, pos);
 
       /* do cliptest */
       if (enable_cliptest) {
          /* allocate clipmask, assign it integer type */
-         clipmask = generate_clipmask(gallivm, outputs,
+         clipmask = generate_clipmask(llvm, outputs,
                                       variant->key.clip_xy,
                                       variant->key.clip_z, 
                                       variant->key.clip_user,
                                       variant->key.clip_halfz,
-                                      variant->key.nr_planes,
-                                      context_ptr);
+                                      variant->key.ucp_enable,
+                                      context_ptr, &have_clipdist);
          /* return clipping boolean value for function */
          clipmask_bool(gallivm, clipmask, ret_ptr);
       }
@@ -1522,40 +1456,43 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
        * and transformed positions in data 
        */   
       convert_to_aos(gallivm, io, outputs, clipmask,
-                     vs_info->num_outputs, max_vertices);
+                     vs_info->num_outputs, max_vertices, have_clipdist);
    }
 
-   lp_build_loop_end_cond(&lp_loop, fetch_count, step, LLVMIntUGE);
+   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
 
    sampler->destroy(sampler);
 
-   ret = LLVMBuildLoad(builder, ret_ptr,"");   
+   ret = LLVMBuildLoad(builder, ret_ptr, "");
    LLVMBuildRet(builder, ret);
-   
+
    /*
     * Translate the LLVM IR into machine code.
     */
 #ifdef DEBUG
-   if (LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
-      lp_debug_dump_value(variant->function_elts);
+   if (LLVMVerifyFunction(variant_func, LLVMPrintMessageAction)) {
+      lp_debug_dump_value(variant_func);
       assert(0);
    }
 #endif
 
-   LLVMRunFunctionPassManager(gallivm->passmgr, variant->function_elts);
+   LLVMRunFunctionPassManager(gallivm->passmgr, variant_func);
 
    if (gallivm_debug & GALLIVM_DEBUG_IR) {
-      lp_debug_dump_value(variant->function_elts);
+      lp_debug_dump_value(variant_func);
       debug_printf("\n");
    }
 
-   code = LLVMGetPointerToGlobal(gallivm->engine, variant->function_elts);
-   variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
+   code = LLVMGetPointerToGlobal(gallivm->engine, variant_func);
+   if (elts)
+      variant->jit_func_elts = (draw_jit_vert_func_elts) pointer_to_func(code);
+   else
+      variant->jit_func = (draw_jit_vert_func) pointer_to_func(code);
 
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
       lp_disassemble(code);
    }
-   lp_func_delete_body(variant->function_elts);
+   lp_func_delete_body(variant_func);
 }
 
 
@@ -1582,7 +1519,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
    key->bypass_viewport = llvm->draw->identity_viewport;
    key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
-   key->nr_planes = llvm->draw->nr_planes;
+   key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
    key->pad = 0;
 
    /* All variants of this shader will have the same value for
@@ -1651,7 +1588,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw)
          jit_tex->min_lod = draw->samplers[i]->min_lod;
          jit_tex->max_lod = draw->samplers[i]->max_lod;
          jit_tex->lod_bias = draw->samplers[i]->lod_bias;
-         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color.f);
       }
    }
 }