tgsi: add info about MSAA samplers to tgsi_shader_info

[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c

index 4a71955f56abf5518ab668b55a25c3d0a2d0004f..84e33926a2bcb408950f3a469c37e44f065c9f6d 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -32,6 +32,7 @@
  #include "draw_gs.h"
  
  #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_arit_overflow.h"
  #include "gallivm/lp_bld_logic.h"
  #include "gallivm/lp_bld_const.h"
  #include "gallivm/lp_bld_swizzle.h"
@@ -673,6 +674,7 @@ generate_vs(struct draw_llvm_variant *variant,
  
  static void
  generate_fetch(struct gallivm_state *gallivm,
+               struct draw_context *draw,
                 LLVMValueRef vbuffers_ptr,
                 LLVMValueRef *res,
                 struct pipe_vertex_element *velem,
@@ -695,35 +697,57 @@ generate_fetch(struct gallivm_state *gallivm,
     LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
     LLVMValueRef stride;
     LLVMValueRef buffer_overflowed;
+   LLVMValueRef needed_buffer_size;
     LLVMValueRef temp_ptr =
        lp_build_alloca(gallivm,
                        lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
+   LLVMValueRef ofbit = NULL;
     struct lp_build_if_state if_ctx;
  
     if (velem->instance_divisor) {
-      /* array index = instance_id / instance_divisor */
-      index = LLVMBuildUDiv(builder, instance_id,
-                            lp_build_const_int32(gallivm, velem->instance_divisor),
-                            "instance_divisor");
+      /* Index is equal to the start instance plus the number of current 
+       * instance divided by the divisor. In this case we compute it as:
+       * index = start_instance + (instance_id  / divisor)
+       */
+      LLVMValueRef current_instance;
+      index = lp_build_const_int32(gallivm, draw->start_instance);
+      current_instance = LLVMBuildUDiv(builder, instance_id,
+                                       lp_build_const_int32(gallivm, velem->instance_divisor),
+                                       "instance_divisor");
+      index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
     }
  
-   stride = LLVMBuildMul(builder, vb_stride, index, "");
-
-   stride = LLVMBuildAdd(builder, stride,
-                         vb_buffer_offset,
-                         "");
-   stride = LLVMBuildAdd(builder, stride,
-                         lp_build_const_int32(gallivm, velem->src_offset),
-                         "");
-
-   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
-                                     stride, buffer_size,
+   stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
+   stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
+   stride = lp_build_uadd_overflow(
+      gallivm, stride,
+      lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
+   needed_buffer_size = lp_build_uadd_overflow(
+      gallivm, stride,
+      lp_build_const_int32(gallivm,
+                           util_format_get_blocksize(velem->src_format)),
+      &ofbit);
+
+   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+                                     needed_buffer_size, buffer_size,
                                       "buffer_overflowed");
-   /*
-   lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);
-   lp_build_print_value(gallivm, "   buffer size = ", buffer_size);
+   buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
+#if 0
+   lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
+                   index, vb_stride);
+   lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
+                   vb_buffer_offset,
+                   lp_build_const_int32(gallivm, velem->src_offset));
+   lp_build_print_value(gallivm, "   blocksize = ",
+                        lp_build_const_int32(
+                           gallivm,
+                           util_format_get_blocksize(velem->src_format)));
+   lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
+   lp_build_printf(gallivm, "   stride = %u\n", stride);
+   lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
+   lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
     lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
-   */
+#endif
  
     lp_build_if(&if_ctx, gallivm, buffer_overflowed);
     {
@@ -953,6 +977,12 @@ convert_to_aos(struct gallivm_state *gallivm,
                              LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
                                           chan, 0));
              lp_build_print_value(gallivm, "val = ", out);
+            {
+               LLVMValueRef iv =
+                  LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
+               
+               lp_build_print_value(gallivm, "  ival = ", iv);
+            }
  #endif
              soa[chan] = out;
           }
@@ -1146,6 +1176,11 @@ generate_clipmask(struct draw_llvm *llvm,
     if (cd[0] != pos || cd[1] != pos)
        have_cd = true;
  
+   if (num_written_clipdistance && !clip_user) {
+      clip_user = true;
+      ucp_enable = (1 << num_written_clipdistance) - 1;
+   }
+
     mask = lp_build_const_int_vec(gallivm, i32_type, 0);
     temp = lp_build_const_int_vec(gallivm, i32_type, 0);
     zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
@@ -1457,7 +1492,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     struct gallivm_state *gallivm = variant->gallivm;
     LLVMContextRef context = gallivm->context;
     LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[9];
+   LLVMTypeRef arg_types[10];
     unsigned num_arg_types =
        elts ? Elements(arg_types) : Elements(arg_types) - 1;
     LLVMTypeRef func_type;
@@ -1467,6 +1502,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     struct lp_type vs_type;
     LLVMValueRef end, start;
     LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
+   LLVMValueRef vertex_id_offset;
     LLVMValueRef stride, step, io_itr;
     LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
     LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
@@ -1512,6 +1548,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     arg_types[i++] = int32_type;                     /* stride */
     arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
     arg_types[i++] = int32_type;                     /* instance_id */
+   arg_types[i++] = int32_type;                     /* vertex_id_offset */
  
     func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
  
@@ -1536,6 +1573,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
     vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
     system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+   vertex_id_offset          = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
  
     lp_build_name(context_ptr, "context");
     lp_build_name(io_ptr, "io");
@@ -1543,6 +1581,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     lp_build_name(stride, "stride");
     lp_build_name(vb_ptr, "vb");
     lp_build_name(system_values.instance_id, "instance_id");
+   lp_build_name(vertex_id_offset, "vertex_id_offset");
  
     if (elts) {
        fetch_elts    = LLVMGetParam(variant_func, 3);
@@ -1590,6 +1629,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
     if (elts) {
        start = zero;
        end = fetch_count;
+      count = fetch_count;
     }
     else {
        end = lp_build_add(&bld, start, count);
@@ -1599,7 +1639,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
  
     fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
  
-   lp_build_loop_begin(&lp_loop, gallivm, start);
+   lp_build_loop_begin(&lp_loop, gallivm, zero);
     {
        LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
        LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
@@ -1607,10 +1647,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
        LLVMValueRef clipmask;   /* holds the clipmask value */
        const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
  
-      if (elts)
-         io_itr = lp_loop.counter;
-      else
-         io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
+      io_itr = lp_loop.counter;
  
        io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
  #if DEBUG_STORE
@@ -1619,21 +1656,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
  #endif
        system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
        for (i = 0; i < vector_length; ++i) {
-         LLVMValueRef true_index =
+         LLVMValueRef vert_index =
              LLVMBuildAdd(builder,
                           lp_loop.counter,
                           lp_build_const_int32(gallivm, i), "");
+         LLVMValueRef true_index =
+            LLVMBuildAdd(builder, start, vert_index, "");
+         LLVMValueRef vertex_id;
  
           /* make sure we're not out of bounds which can happen
            * if fetch_count % 4 != 0, because on the last iteration
            * a few of the 4 vertex fetches will be out of bounds */
           true_index = lp_build_min(&bld, true_index, fetch_max);
  
-         system_values.vertex_id = LLVMBuildInsertElement(
-            gallivm->builder,
-            system_values.vertex_id, true_index,
-            lp_build_const_int32(gallivm, i), "");
-
           if (elts) {
              LLVMValueRef fetch_ptr;
              LLVMValueRef index_overflowed;
@@ -1642,10 +1677,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                    gallivm,
                    lp_build_vec_type(gallivm, lp_type_int(32)), "");
              struct lp_build_if_state if_ctx;
-            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
                                               true_index, fetch_elt_max,
                                               "index_overflowed");
-            
+
              lp_build_if(&if_ctx, gallivm, index_overflowed);
              {
                 /* Generate maximum possible index so that
@@ -1670,13 +1705,30 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
              lp_build_endif(&if_ctx);
              true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
           }
+         /* in the paths with elts vertex id has to be unaffected by the
+          * index bias and because indices inside our elements array have
+          * already had index bias applied we need to subtract it here to
+          * get back to the original index.
+          * in the linear paths vertex id has to be unaffected by the
+          * original start index and because we abuse the 'start' variable
+          * to either represent the actual start index or the index at which
+          * the primitive was split (we split rendering into chunks of at
+          * most 4095-vertices) we need to back out the original start
+          * index out of our vertex id here.
+          */
+         vertex_id = LLVMBuildSub(builder, true_index, vertex_id_offset, "");
+
+         system_values.vertex_id = LLVMBuildInsertElement(
+            gallivm->builder,
+            system_values.vertex_id, vertex_id,
+            lp_build_const_int32(gallivm, i), "");
  
           for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
              struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
              LLVMValueRef vb_index =
                 lp_build_const_int32(gallivm, velem->vertex_buffer_index);
              LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
-            generate_fetch(gallivm, vbuffers_ptr,
+            generate_fetch(gallivm, draw, vbuffers_ptr,
                             &aos_attribs[j][i], velem, vb, true_index,
                             system_values.instance_id);
           }
@@ -1739,8 +1791,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                       vs_info->num_outputs, vs_type,
                       have_clipdist);
     }
-
-   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
+   lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
  
     sampler->destroy(sampler);
  
@@ -1782,6 +1833,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
     key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
     key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
     key->has_gs = llvm->draw->gs.geometry_shader != NULL;
+   key->num_outputs = draw_total_vs_outputs(llvm->draw);
     key->pad1 = 0;
  
     /* All variants of this shader will have the same value for
@@ -1988,31 +2040,19 @@ generate_mask_value(struct draw_gs_llvm_variant *variant,
  {
     struct gallivm_state *gallivm = variant->gallivm;
     LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef bits[16];
-   struct lp_type  mask_type = lp_int_type(gs_type);
-   struct lp_type mask_elem_type = lp_elem_type(mask_type);
-   LLVMValueRef mask_val = lp_build_const_vec(gallivm,
-                                              mask_type,
-                                              0);
+   struct lp_type mask_type = lp_int_type(gs_type);
+   LLVMValueRef num_prims;
+   LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
     unsigned i;
  
-   assert(gs_type.length <= Elements(bits));
-
-   for (i = gs_type.length; i >= 1; --i) {
-      int idx = i - 1;
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      bits[idx] = lp_build_compare(gallivm,
-                                   mask_elem_type, PIPE_FUNC_GEQUAL,
-                                   variant->num_prims, ind);
+   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
+                                  variant->num_prims);
+   for (i = 0; i <= gs_type.length; i++) {
+      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
     }
-   for (i = 0; i < gs_type.length; ++i) {
-      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
-      mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
-   }
-   mask_val = lp_build_compare(gallivm,
-                               mask_type, PIPE_FUNC_NOTEQUAL,
-                               mask_val,
-                               lp_build_const_int_vec(gallivm, mask_type, 0));
+   mask_val = lp_build_compare(gallivm, mask_type,
+                               PIPE_FUNC_GREATER, num_prims, mask_val);
  
     return mask_val;
  }
@@ -2125,6 +2165,11 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
        system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
     }
  
+   if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+      tgsi_dump(tokens, 0);
+      draw_gs_llvm_dump_variant_key(&variant->key);
+   }
+
     lp_build_tgsi_soa(variant->gallivm,
                       tokens,
                       gs_type,
@@ -2219,6 +2264,8 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
  
     key = (struct draw_gs_llvm_variant_key *)store;
  
+   key->num_outputs = draw_total_gs_outputs(llvm->draw);
+
     /* All variants of this shader will have the same value for
      * nr_samplers.  Not yet trying to compact away holes in the
      * sampler array.