tgsi: add info about MSAA samplers to tgsi_shader_info
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
index f27776a5cc539b6cea6f6fe7a901c3a922d39889..84e33926a2bcb408950f3a469c37e44f065c9f6d 100644 (file)
@@ -32,6 +32,7 @@
 #include "draw_gs.h"
 
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_arit_overflow.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_swizzle.h"
@@ -673,6 +674,7 @@ generate_vs(struct draw_llvm_variant *variant,
 
 static void
 generate_fetch(struct gallivm_state *gallivm,
+               struct draw_context *draw,
                LLVMValueRef vbuffers_ptr,
                LLVMValueRef *res,
                struct pipe_vertex_element *velem,
@@ -699,32 +701,37 @@ generate_fetch(struct gallivm_state *gallivm,
    LLVMValueRef temp_ptr =
       lp_build_alloca(gallivm,
                       lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
+   LLVMValueRef ofbit = NULL;
    struct lp_build_if_state if_ctx;
 
    if (velem->instance_divisor) {
-      /* array index = instance_id / instance_divisor */
-      index = LLVMBuildUDiv(builder, instance_id,
-                            lp_build_const_int32(gallivm, velem->instance_divisor),
-                            "instance_divisor");
+      /* Index is equal to the start instance plus the number of current 
+       * instance divided by the divisor. In this case we compute it as:
+       * index = start_instance + (instance_id  / divisor)
+       */
+      LLVMValueRef current_instance;
+      index = lp_build_const_int32(gallivm, draw->start_instance);
+      current_instance = LLVMBuildUDiv(builder, instance_id,
+                                       lp_build_const_int32(gallivm, velem->instance_divisor),
+                                       "instance_divisor");
+      index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
    }
 
-   stride = LLVMBuildMul(builder, vb_stride, index, "");
-
-   stride = LLVMBuildAdd(builder, stride,
-                         vb_buffer_offset,
-                         "");
-   stride = LLVMBuildAdd(builder, stride,
-                         lp_build_const_int32(gallivm, velem->src_offset),
-                         "");
-   needed_buffer_size = LLVMBuildAdd(
-      builder, stride,
+   stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
+   stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
+   stride = lp_build_uadd_overflow(
+      gallivm, stride,
+      lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
+   needed_buffer_size = lp_build_uadd_overflow(
+      gallivm, stride,
       lp_build_const_int32(gallivm,
                            util_format_get_blocksize(velem->src_format)),
-      "");
+      &ofbit);
 
    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
                                      needed_buffer_size, buffer_size,
                                      "buffer_overflowed");
+   buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
 #if 0
    lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
                    index, vb_stride);
@@ -735,6 +742,7 @@ generate_fetch(struct gallivm_state *gallivm,
                         lp_build_const_int32(
                            gallivm,
                            util_format_get_blocksize(velem->src_format)));
+   lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
    lp_build_printf(gallivm, "   stride = %u\n", stride);
    lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
    lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
@@ -969,6 +977,12 @@ convert_to_aos(struct gallivm_state *gallivm,
                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
                                          chan, 0));
             lp_build_print_value(gallivm, "val = ", out);
+            {
+               LLVMValueRef iv =
+                  LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
+               
+               lp_build_print_value(gallivm, "  ival = ", iv);
+            }
 #endif
             soa[chan] = out;
          }
@@ -1478,7 +1492,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMContextRef context = gallivm->context;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[9];
+   LLVMTypeRef arg_types[10];
    unsigned num_arg_types =
       elts ? Elements(arg_types) : Elements(arg_types) - 1;
    LLVMTypeRef func_type;
@@ -1488,6 +1502,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    struct lp_type vs_type;
    LLVMValueRef end, start;
    LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
+   LLVMValueRef vertex_id_offset;
    LLVMValueRef stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
@@ -1533,6 +1548,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    arg_types[i++] = int32_type;                     /* stride */
    arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
    arg_types[i++] = int32_type;                     /* instance_id */
+   arg_types[i++] = int32_type;                     /* vertex_id_offset */
 
    func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
 
@@ -1557,6 +1573,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
    vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
    system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+   vertex_id_offset          = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
@@ -1564,6 +1581,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
    lp_build_name(system_values.instance_id, "instance_id");
+   lp_build_name(vertex_id_offset, "vertex_id_offset");
 
    if (elts) {
       fetch_elts    = LLVMGetParam(variant_func, 3);
@@ -1638,22 +1656,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
 #endif
       system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
       for (i = 0; i < vector_length; ++i) {
-         LLVMValueRef true_index =
+         LLVMValueRef vert_index =
             LLVMBuildAdd(builder,
                          lp_loop.counter,
                          lp_build_const_int32(gallivm, i), "");
-         true_index = LLVMBuildAdd(builder, start, true_index, "");
+         LLVMValueRef true_index =
+            LLVMBuildAdd(builder, start, vert_index, "");
+         LLVMValueRef vertex_id;
 
          /* make sure we're not out of bounds which can happen
           * if fetch_count % 4 != 0, because on the last iteration
           * a few of the 4 vertex fetches will be out of bounds */
          true_index = lp_build_min(&bld, true_index, fetch_max);
 
-         system_values.vertex_id = LLVMBuildInsertElement(
-            gallivm->builder,
-            system_values.vertex_id, true_index,
-            lp_build_const_int32(gallivm, i), "");
-
          if (elts) {
             LLVMValueRef fetch_ptr;
             LLVMValueRef index_overflowed;
@@ -1662,10 +1677,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                   gallivm,
                   lp_build_vec_type(gallivm, lp_type_int(32)), "");
             struct lp_build_if_state if_ctx;
-            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
                                              true_index, fetch_elt_max,
                                              "index_overflowed");
-            
+
             lp_build_if(&if_ctx, gallivm, index_overflowed);
             {
                /* Generate maximum possible index so that
@@ -1690,13 +1705,30 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
             lp_build_endif(&if_ctx);
             true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
          }
+         /* in the paths with elts vertex id has to be unaffected by the
+          * index bias and because indices inside our elements array have
+          * already had index bias applied we need to subtract it here to
+          * get back to the original index.
+          * in the linear paths vertex id has to be unaffected by the
+          * original start index and because we abuse the 'start' variable
+          * to either represent the actual start index or the index at which
+          * the primitive was split (we split rendering into chunks of at
+          * most 4095-vertices) we need to back out the original start
+          * index out of our vertex id here.
+          */
+         vertex_id = LLVMBuildSub(builder, true_index, vertex_id_offset, "");
+
+         system_values.vertex_id = LLVMBuildInsertElement(
+            gallivm->builder,
+            system_values.vertex_id, vertex_id,
+            lp_build_const_int32(gallivm, i), "");
 
          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
             LLVMValueRef vb_index =
                lp_build_const_int32(gallivm, velem->vertex_buffer_index);
             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
-            generate_fetch(gallivm, vbuffers_ptr,
+            generate_fetch(gallivm, draw, vbuffers_ptr,
                            &aos_attribs[j][i], velem, vb, true_index,
                            system_values.instance_id);
          }
@@ -1801,6 +1833,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
+   key->num_outputs = draw_total_vs_outputs(llvm->draw);
    key->pad1 = 0;
 
    /* All variants of this shader will have the same value for
@@ -2007,31 +2040,19 @@ generate_mask_value(struct draw_gs_llvm_variant *variant,
 {
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef bits[16];
-   struct lp_type  mask_type = lp_int_type(gs_type);
-   struct lp_type mask_elem_type = lp_elem_type(mask_type);
-   LLVMValueRef mask_val = lp_build_const_vec(gallivm,
-                                              mask_type,
-                                              0);
+   struct lp_type mask_type = lp_int_type(gs_type);
+   LLVMValueRef num_prims;
+   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
    unsigned i;
 
-   assert(gs_type.length <= Elements(bits));
-
-   for (i = gs_type.length; i >= 1; --i) {
-      int idx = i - 1;
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      bits[idx] = lp_build_compare(gallivm,
-                                   mask_elem_type, PIPE_FUNC_GEQUAL,
-                                   variant->num_prims, ind);
-   }
-   for (i = 0; i < gs_type.length; ++i) {
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
+   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
+                                  variant->num_prims);
+   for (i = 0; i <= gs_type.length; i++) {
+      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
    }
-   mask_val = lp_build_compare(gallivm,
-                               mask_type, PIPE_FUNC_NOTEQUAL,
-                               mask_val,
-                               lp_build_const_int_vec(gallivm, mask_type, 0));
+   mask_val = lp_build_compare(gallivm, mask_type,
+                               PIPE_FUNC_GREATER, num_prims, mask_val);
 
    return mask_val;
 }
@@ -2144,6 +2165,11 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
    }
 
+   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+      tgsi_dump(tokens, 0);
+      draw_gs_llvm_dump_variant_key(&variant->key);
+   }
+
    lp_build_tgsi_soa(variant->gallivm,
                      tokens,
                      gs_type,
@@ -2238,6 +2264,8 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
 
    key = (struct draw_gs_llvm_variant_key *)store;
 
+   key->num_outputs = draw_total_gs_outputs(llvm->draw);
+
    /* All variants of this shader will have the same value for
     * nr_samplers.  Not yet trying to compact away holes in the
     * sampler array.