#include "draw_gs.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_arit_overflow.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_swizzle.h"
static void
generate_fetch(struct gallivm_state *gallivm,
+ struct draw_context *draw,
LLVMValueRef vbuffers_ptr,
LLVMValueRef *res,
struct pipe_vertex_element *velem,
LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
LLVMValueRef stride;
LLVMValueRef buffer_overflowed;
+ LLVMValueRef needed_buffer_size;
LLVMValueRef temp_ptr =
lp_build_alloca(gallivm,
lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
+ LLVMValueRef ofbit = NULL;
struct lp_build_if_state if_ctx;
if (velem->instance_divisor) {
- /* array index = instance_id / instance_divisor */
- index = LLVMBuildUDiv(builder, instance_id,
- lp_build_const_int32(gallivm, velem->instance_divisor),
- "instance_divisor");
+ /* Index is equal to the start instance plus the number of current
+ * instance divided by the divisor. In this case we compute it as:
+ * index = start_instance + (instance_id / divisor)
+ */
+ LLVMValueRef current_instance;
+ index = lp_build_const_int32(gallivm, draw->start_instance);
+ current_instance = LLVMBuildUDiv(builder, instance_id,
+ lp_build_const_int32(gallivm, velem->instance_divisor),
+ "instance_divisor");
+ index = lp_build_uadd_overflow(gallivm, index, current_instance, &ofbit);
}
- stride = LLVMBuildMul(builder, vb_stride, index, "");
-
- stride = LLVMBuildAdd(builder, stride,
- vb_buffer_offset,
- "");
- stride = LLVMBuildAdd(builder, stride,
- lp_build_const_int32(gallivm, velem->src_offset),
- "");
-
- buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
- stride, buffer_size,
+ stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
+ stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
+ stride = lp_build_uadd_overflow(
+ gallivm, stride,
+ lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
+ needed_buffer_size = lp_build_uadd_overflow(
+ gallivm, stride,
+ lp_build_const_int32(gallivm,
+ util_format_get_blocksize(velem->src_format)),
+ &ofbit);
+
+ buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+ needed_buffer_size, buffer_size,
"buffer_overflowed");
- /*
- lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);
- lp_build_print_value(gallivm, " buffer size = ", buffer_size);
+ buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
+#if 0
+ lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
+ index, vb_stride);
+ lp_build_printf(gallivm, " vb_buffer_offset = %u, src_offset is %u\n",
+ vb_buffer_offset,
+ lp_build_const_int32(gallivm, velem->src_offset));
+ lp_build_print_value(gallivm, " blocksize = ",
+ lp_build_const_int32(
+ gallivm,
+ util_format_get_blocksize(velem->src_format)));
+ lp_build_printf(gallivm, " instance_id = %u\n", instance_id);
+ lp_build_printf(gallivm, " stride = %u\n", stride);
+ lp_build_printf(gallivm, " buffer size = %u\n", buffer_size);
+ lp_build_printf(gallivm, " needed_buffer_size = %u\n", needed_buffer_size);
lp_build_print_value(gallivm, " buffer overflowed = ", buffer_overflowed);
- */
+#endif
lp_build_if(&if_ctx, gallivm, buffer_overflowed);
{
LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
chan, 0));
lp_build_print_value(gallivm, "val = ", out);
+ {
+ LLVMValueRef iv =
+ LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
+
+ lp_build_print_value(gallivm, " ival = ", iv);
+ }
#endif
soa[chan] = out;
}
if (cd[0] != pos || cd[1] != pos)
have_cd = true;
+ if (num_written_clipdistance && !clip_user) {
+ clip_user = true;
+ ucp_enable = (1 << num_written_clipdistance) - 1;
+ }
+
mask = lp_build_const_int_vec(gallivm, i32_type, 0);
temp = lp_build_const_int_vec(gallivm, i32_type, 0);
zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
struct gallivm_state *gallivm = variant->gallivm;
LLVMContextRef context = gallivm->context;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
- LLVMTypeRef arg_types[9];
+ LLVMTypeRef arg_types[10];
unsigned num_arg_types =
elts ? Elements(arg_types) : Elements(arg_types) - 1;
LLVMTypeRef func_type;
struct lp_type vs_type;
LLVMValueRef end, start;
LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
+ LLVMValueRef vertex_id_offset;
LLVMValueRef stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
arg_types[i++] = int32_type; /* stride */
arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */
arg_types[i++] = int32_type; /* instance_id */
+ arg_types[i++] = int32_type; /* vertex_id_offset */
func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
stride = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
vb_ptr = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+ vertex_id_offset = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
lp_build_name(system_values.instance_id, "instance_id");
+ lp_build_name(vertex_id_offset, "vertex_id_offset");
if (elts) {
fetch_elts = LLVMGetParam(variant_func, 3);
if (elts) {
start = zero;
end = fetch_count;
+ count = fetch_count;
}
else {
end = lp_build_add(&bld, start, count);
fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
- lp_build_loop_begin(&lp_loop, gallivm, start);
+ lp_build_loop_begin(&lp_loop, gallivm, zero);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
LLVMValueRef clipmask; /* holds the clipmask value */
const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
- if (elts)
- io_itr = lp_loop.counter;
- else
- io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
+ io_itr = lp_loop.counter;
io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
#if DEBUG_STORE
#endif
system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
for (i = 0; i < vector_length; ++i) {
- LLVMValueRef true_index =
+ LLVMValueRef vert_index =
LLVMBuildAdd(builder,
lp_loop.counter,
lp_build_const_int32(gallivm, i), "");
+ LLVMValueRef true_index =
+ LLVMBuildAdd(builder, start, vert_index, "");
+ LLVMValueRef vertex_id;
/* make sure we're not out of bounds which can happen
* if fetch_count % 4 != 0, because on the last iteration
* a few of the 4 vertex fetches will be out of bounds */
true_index = lp_build_min(&bld, true_index, fetch_max);
- system_values.vertex_id = LLVMBuildInsertElement(
- gallivm->builder,
- system_values.vertex_id, true_index,
- lp_build_const_int32(gallivm, i), "");
-
if (elts) {
LLVMValueRef fetch_ptr;
LLVMValueRef index_overflowed;
gallivm,
lp_build_vec_type(gallivm, lp_type_int(32)), "");
struct lp_build_if_state if_ctx;
- index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+ index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
true_index, fetch_elt_max,
"index_overflowed");
-
+
lp_build_if(&if_ctx, gallivm, index_overflowed);
{
/* Generate maximum possible index so that
lp_build_endif(&if_ctx);
true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
}
+ /* in the paths with elts vertex id has to be unaffected by the
+ * index bias and because indices inside our elements array have
+ * already had index bias applied we need to subtract it here to
+ * get back to the original index.
+ * in the linear paths vertex id has to be unaffected by the
+ * original start index and because we abuse the 'start' variable
+ * to either represent the actual start index or the index at which
+ * the primitive was split (we split rendering into chunks of at
+ * most 4095-vertices) we need to back out the original start
+ * index out of our vertex id here.
+ */
+ vertex_id = LLVMBuildSub(builder, true_index, vertex_id_offset, "");
+
+ system_values.vertex_id = LLVMBuildInsertElement(
+ gallivm->builder,
+ system_values.vertex_id, vertex_id,
+ lp_build_const_int32(gallivm, i), "");
for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
LLVMValueRef vb_index =
lp_build_const_int32(gallivm, velem->vertex_buffer_index);
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
- generate_fetch(gallivm, vbuffers_ptr,
+ generate_fetch(gallivm, draw, vbuffers_ptr,
&aos_attribs[j][i], velem, vb, true_index,
system_values.instance_id);
}
vs_info->num_outputs, vs_type,
have_clipdist);
}
-
- lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
+ lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
sampler->destroy(sampler);
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
key->has_gs = llvm->draw->gs.geometry_shader != NULL;
+ key->num_outputs = draw_total_vs_outputs(llvm->draw);
key->pad1 = 0;
/* All variants of this shader will have the same value for
{
struct gallivm_state *gallivm = variant->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef bits[16];
- struct lp_type mask_type = lp_int_type(gs_type);
- struct lp_type mask_elem_type = lp_elem_type(mask_type);
- LLVMValueRef mask_val = lp_build_const_vec(gallivm,
- mask_type,
- 0);
+ struct lp_type mask_type = lp_int_type(gs_type);
+ LLVMValueRef num_prims;
+ LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
unsigned i;
- assert(gs_type.length <= Elements(bits));
-
- for (i = gs_type.length; i >= 1; --i) {
- int idx = i - 1;
- LLVMValueRef ind = lp_build_const_int32(gallivm, i);
- bits[idx] = lp_build_compare(gallivm,
- mask_elem_type, PIPE_FUNC_GEQUAL,
- variant->num_prims, ind);
+ num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
+ variant->num_prims);
+ for (i = 0; i <= gs_type.length; i++) {
+ LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+ mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
}
- for (i = 0; i < gs_type.length; ++i) {
- LLVMValueRef ind = lp_build_const_int32(gallivm, i);
- mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
- }
- mask_val = lp_build_compare(gallivm,
- mask_type, PIPE_FUNC_NOTEQUAL,
- mask_val,
- lp_build_const_int_vec(gallivm, mask_type, 0));
+ mask_val = lp_build_compare(gallivm, mask_type,
+ PIPE_FUNC_GREATER, num_prims, mask_val);
return mask_val;
}
system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
}
+ if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
+ tgsi_dump(tokens, 0);
+ draw_gs_llvm_dump_variant_key(&variant->key);
+ }
+
lp_build_tgsi_soa(variant->gallivm,
tokens,
gs_type,
key = (struct draw_gs_llvm_variant_key *)store;
+ key->num_outputs = draw_total_gs_outputs(llvm->draw);
+
/* All variants of this shader will have the same value for
* nr_samplers. Not yet trying to compact away holes in the
* sampler array.