tgsi: add info about MSAA samplers to tgsi_shader_info
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
index 4a71955f56abf5518ab668b55a25c3d0a2d0004f..84e33926a2bcb408950f3a469c37e44f065c9f6d 100644 (file)
@@ -32,6 +32,7 @@
 #include "draw_gs.h"
 
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_arit_overflow.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_swizzle.h"
@@ -673,6 +674,7 @@ generate_vs(struct draw_llvm_variant *variant,
 
 static void
 generate_fetch(struct gallivm_state *gallivm,
+               struct draw_context *draw,
                LLVMValueRef vbuffers_ptr,
                LLVMValueRef *res,
                struct pipe_vertex_element *velem,
@@ -695,35 +697,57 @@ generate_fetch(struct gallivm_state *gallivm,
    LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
    LLVMValueRef stride;
    LLVMValueRef buffer_overflowed;
+   LLVMValueRef needed_buffer_size;
    LLVMValueRef temp_ptr =
       lp_build_alloca(gallivm,
                       lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
+   LLVMValueRef ofbit = NULL;
    struct lp_build_if_state if_ctx;
 
    if (velem->instance_divisor) {
-      /* array index = instance_id / instance_divisor */
-      index = LLVMBuildUDiv(builder, instance_id,
-                            lp_build_const_int32(gallivm, velem->instance_divisor),
-                            "instance_divisor");
+      /* Index is equal to the start instance plus the number of current 
+       * instance divided by the divisor. In this case we compute it as:
+       * index = start_instance + (instance_id  / divisor)
+       */
+      LLVMValueRef current_instance;
+      index = lp_build_const_int32(gallivm, draw->start_instance);
+      current_instance = LLVMBuildUDiv(builder, instance_id,
+                                       lp_build_const_int32(gallivm, velem->instance_divisor),
+                                       "instance_divisor");
+      index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
    }
 
-   stride = LLVMBuildMul(builder, vb_stride, index, "");
-
-   stride = LLVMBuildAdd(builder, stride,
-                         vb_buffer_offset,
-                         "");
-   stride = LLVMBuildAdd(builder, stride,
-                         lp_build_const_int32(gallivm, velem->src_offset),
-                         "");
-
-   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
-                                     stride, buffer_size,
+   stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
+   stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
+   stride = lp_build_uadd_overflow(
+      gallivm, stride,
+      lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
+   needed_buffer_size = lp_build_uadd_overflow(
+      gallivm, stride,
+      lp_build_const_int32(gallivm,
+                           util_format_get_blocksize(velem->src_format)),
+      &ofbit);
+
+   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+                                     needed_buffer_size, buffer_size,
                                      "buffer_overflowed");
-   /*
-   lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);
-   lp_build_print_value(gallivm, "   buffer size = ", buffer_size);
+   buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
+#if 0
+   lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
+                   index, vb_stride);
+   lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
+                   vb_buffer_offset,
+                   lp_build_const_int32(gallivm, velem->src_offset));
+   lp_build_print_value(gallivm, "   blocksize = ",
+                        lp_build_const_int32(
+                           gallivm,
+                           util_format_get_blocksize(velem->src_format)));
+   lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
+   lp_build_printf(gallivm, "   stride = %u\n", stride);
+   lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
+   lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
    lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
-   */
+#endif
 
    lp_build_if(&if_ctx, gallivm, buffer_overflowed);
    {
@@ -953,6 +977,12 @@ convert_to_aos(struct gallivm_state *gallivm,
                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
                                          chan, 0));
             lp_build_print_value(gallivm, "val = ", out);
+            {
+               LLVMValueRef iv =
+                  LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
+               
+               lp_build_print_value(gallivm, "  ival = ", iv);
+            }
 #endif
             soa[chan] = out;
          }
@@ -1146,6 +1176,11 @@ generate_clipmask(struct draw_llvm *llvm,
    if (cd[0] != pos || cd[1] != pos)
       have_cd = true;
 
+   if (num_written_clipdistance && !clip_user) {
+      clip_user = true;
+      ucp_enable = (1 << num_written_clipdistance) - 1;
+   }
+
    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
@@ -1457,7 +1492,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMContextRef context = gallivm->context;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[9];
+   LLVMTypeRef arg_types[10];
    unsigned num_arg_types =
       elts ? Elements(arg_types) : Elements(arg_types) - 1;
    LLVMTypeRef func_type;
@@ -1467,6 +1502,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    struct lp_type vs_type;
    LLVMValueRef end, start;
    LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
+   LLVMValueRef vertex_id_offset;
    LLVMValueRef stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
@@ -1512,6 +1548,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    arg_types[i++] = int32_type;                     /* stride */
    arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
    arg_types[i++] = int32_type;                     /* instance_id */
+   arg_types[i++] = int32_type;                     /* vertex_id_offset */
 
    func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
 
@@ -1536,6 +1573,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
    vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
    system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+   vertex_id_offset          = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
@@ -1543,6 +1581,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
    lp_build_name(system_values.instance_id, "instance_id");
+   lp_build_name(vertex_id_offset, "vertex_id_offset");
 
    if (elts) {
       fetch_elts    = LLVMGetParam(variant_func, 3);
@@ -1590,6 +1629,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    if (elts) {
       start = zero;
       end = fetch_count;
+      count = fetch_count;
    }
    else {
       end = lp_build_add(&bld, start, count);
@@ -1599,7 +1639,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
 
    fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
 
-   lp_build_loop_begin(&lp_loop, gallivm, start);
+   lp_build_loop_begin(&lp_loop, gallivm, zero);
    {
       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
       LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
@@ -1607,10 +1647,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
       LLVMValueRef clipmask;   /* holds the clipmask value */
       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
 
-      if (elts)
-         io_itr = lp_loop.counter;
-      else
-         io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
+      io_itr = lp_loop.counter;
 
       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
 #if DEBUG_STORE
@@ -1619,21 +1656,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
 #endif
       system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
       for (i = 0; i < vector_length; ++i) {
-         LLVMValueRef true_index =
+         LLVMValueRef vert_index =
             LLVMBuildAdd(builder,
                          lp_loop.counter,
                          lp_build_const_int32(gallivm, i), "");
+         LLVMValueRef true_index =
+            LLVMBuildAdd(builder, start, vert_index, "");
+         LLVMValueRef vertex_id;
 
          /* make sure we're not out of bounds which can happen
           * if fetch_count % 4 != 0, because on the last iteration
           * a few of the 4 vertex fetches will be out of bounds */
          true_index = lp_build_min(&bld, true_index, fetch_max);
 
-         system_values.vertex_id = LLVMBuildInsertElement(
-            gallivm->builder,
-            system_values.vertex_id, true_index,
-            lp_build_const_int32(gallivm, i), "");
-
          if (elts) {
             LLVMValueRef fetch_ptr;
             LLVMValueRef index_overflowed;
@@ -1642,10 +1677,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                   gallivm,
                   lp_build_vec_type(gallivm, lp_type_int(32)), "");
             struct lp_build_if_state if_ctx;
-            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
                                              true_index, fetch_elt_max,
                                              "index_overflowed");
-            
+
             lp_build_if(&if_ctx, gallivm, index_overflowed);
             {
                /* Generate maximum possible index so that
@@ -1670,13 +1705,30 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
             lp_build_endif(&if_ctx);
             true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
          }
+         /* in the paths with elts vertex id has to be unaffected by the
+          * index bias and because indices inside our elements array have
+          * already had index bias applied we need to subtract it here to
+          * get back to the original index.
+          * in the linear paths vertex id has to be unaffected by the
+          * original start index and because we abuse the 'start' variable
+          * to either represent the actual start index or the index at which
+          * the primitive was split (we split rendering into chunks of at
+          * most 4095-vertices) we need to back out the original start
+          * index out of our vertex id here.
+          */
+         vertex_id = LLVMBuildSub(builder, true_index, vertex_id_offset, "");
+
+         system_values.vertex_id = LLVMBuildInsertElement(
+            gallivm->builder,
+            system_values.vertex_id, vertex_id,
+            lp_build_const_int32(gallivm, i), "");
 
          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
             LLVMValueRef vb_index =
                lp_build_const_int32(gallivm, velem->vertex_buffer_index);
             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
-            generate_fetch(gallivm, vbuffers_ptr,
+            generate_fetch(gallivm, draw, vbuffers_ptr,
                            &aos_attribs[j][i], velem, vb, true_index,
                            system_values.instance_id);
          }
@@ -1739,8 +1791,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                      vs_info->num_outputs, vs_type,
                      have_clipdist);
    }
-
-   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
+   lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
 
    sampler->destroy(sampler);
 
@@ -1782,6 +1833,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
+   key->num_outputs = draw_total_vs_outputs(llvm->draw);
    key->pad1 = 0;
 
    /* All variants of this shader will have the same value for
@@ -1988,31 +2040,19 @@ generate_mask_value(struct draw_gs_llvm_variant *variant,
 {
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef bits[16];
-   struct lp_type  mask_type = lp_int_type(gs_type);
-   struct lp_type mask_elem_type = lp_elem_type(mask_type);
-   LLVMValueRef mask_val = lp_build_const_vec(gallivm,
-                                              mask_type,
-                                              0);
+   struct lp_type mask_type = lp_int_type(gs_type);
+   LLVMValueRef num_prims;
+   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
    unsigned i;
 
-   assert(gs_type.length <= Elements(bits));
-
-   for (i = gs_type.length; i >= 1; --i) {
-      int idx = i - 1;
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      bits[idx] = lp_build_compare(gallivm,
-                                   mask_elem_type, PIPE_FUNC_GEQUAL,
-                                   variant->num_prims, ind);
+   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
+                                  variant->num_prims);
+   for (i = 0; i <= gs_type.length; i++) {
+      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
    }
-   for (i = 0; i < gs_type.length; ++i) {
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
-   }
-   mask_val = lp_build_compare(gallivm,
-                               mask_type, PIPE_FUNC_NOTEQUAL,
-                               mask_val,
-                               lp_build_const_int_vec(gallivm, mask_type, 0));
+   mask_val = lp_build_compare(gallivm, mask_type,
+                               PIPE_FUNC_GREATER, num_prims, mask_val);
 
    return mask_val;
 }
@@ -2125,6 +2165,11 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
    }
 
+   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+      tgsi_dump(tokens, 0);
+      draw_gs_llvm_dump_variant_key(&variant->key);
+   }
+
    lp_build_tgsi_soa(variant->gallivm,
                      tokens,
                      gs_type,
@@ -2219,6 +2264,8 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
 
    key = (struct draw_gs_llvm_variant_key *)store;
 
+   key->num_outputs = draw_total_gs_outputs(llvm->draw);
+
    /* All variants of this shader will have the same value for
     * nr_samplers.  Not yet trying to compact away holes in the
     * sampler array.