- for (i = 0; i < vector_length; ++i) {
- LLVMValueRef vert_index =
- LLVMBuildAdd(builder,
- lp_loop.counter,
- lp_build_const_int32(gallivm, i), "");
- LLVMValueRef true_index =
- LLVMBuildAdd(builder, start, vert_index, "");
-
- /* make sure we're not out of bounds which can happen
- * if fetch_count % 4 != 0, because on the last iteration
- * a few of the 4 vertex fetches will be out of bounds */
- true_index = lp_build_min(&bld, true_index, fetch_max);
-
- if (elts) {
- LLVMValueRef fetch_ptr;
- LLVMValueRef index_overflowed;
- LLVMValueRef index_ptr =
- lp_build_alloca(
- gallivm,
- lp_build_vec_type(gallivm, lp_type_int(32)), "");
- struct lp_build_if_state if_ctx;
- index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
- true_index, fetch_elt_max,
- "index_overflowed");
-
- lp_build_if(&if_ctx, gallivm, index_overflowed);
- {
- /* Generate maximum possible index so that
- * generate_fetch can treat it just like
- * any other overflow and return zeros.
- * We don't have to worry about the restart
- * primitive index because it has already been
- * handled
- */
- LLVMValueRef val =
- lp_build_const_int32(gallivm, 0xffffffff);
- LLVMBuildStore(builder, val, index_ptr);
- }
- lp_build_else(&if_ctx);
- {
- LLVMValueRef val;
- fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
- &true_index, 1, "");
- val = LLVMBuildLoad(builder, fetch_ptr, "");
- LLVMBuildStore(builder, val, index_ptr);
- }
- lp_build_endif(&if_ctx);
- true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
- }
- true_indices[i] = true_index;
- true_index_array = LLVMBuildInsertElement(
- gallivm->builder, true_index_array, true_index,
- lp_build_const_int32(gallivm, i), "");
+ true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
+ true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
+
+ LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
+ /*
+ * Limit indices to fetch_max, otherwise might try to access indices
+ * beyond index buffer (or rather vsplit elt buffer) size.
+ * Could probably safely (?) skip this for non-indexed draws and
+ * simplify things minimally (by removing it could combine the ind_vec
+ * and start_vec adds). I think the only effect for non-indexed draws will
+ * be that for the invalid elements they will be all fetched from the
+ * same location as the last valid one, but noone should really care.
+ */
+ true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
+
+ index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
+
+ lp_build_if(&if_ctx, gallivm, have_elts);
+ {
+ /*
+ * Note: you'd expect some comparison/clamp against fetch_elt_max
+ * here.
+ * There used to be one here but it was incorrect: overflow was
+ * detected if index > fetch_elt_max - but the correct condition
+ * would be index >= fetch_elt_max (since this is just size of elts
+ * buffer / element size).
+ * Using the correct condition however will cause failures - due to
+ * vsplit/vcache code which rebases indices. So, as an example, if
+ * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
+ * replace all invalid indices with 0 - which in case of elt_bias
+ * not being zero will get a different fetch index than the valid
+ * index 0. So, just rely on vsplit code preventing out-of-bounds
+ * fetches. This is also why it's safe to do elts fetch even if there
+ * was no index buffer bound - the real buffer is never seen here, at
+ * least not if there are index buffer overflows...
+ */
+
+ /*
+ * XXX should not have to do this, as scale can be handled
+ * natively by loads (hits asserts though).
+ */
+ tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
+ fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
+ LLVMPointerType(LLVMInt8TypeInContext(context),
+ 0), "");
+ tmp = lp_build_gather(gallivm, vs_type.length,
+ 32, bld.type, TRUE,
+ fetch_elts, tmp, FALSE);
+ LLVMBuildStore(builder, tmp, index_store);