#include "draw_vs.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_swizzle.h"
#include "gallivm/lp_bld_struct.h"
#include "gallivm/lp_bld_type.h"
#include "gallivm/lp_bld_flow.h"
store_aos(LLVMBuilderRef builder,
LLVMValueRef io_ptr,
LLVMValueRef index,
- LLVMValueRef value)
+ LLVMValueRef value,
+ LLVMValueRef clipmask)
{
LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
LLVMValueRef indices[3];
+ LLVMValueRef val, shift;
indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indices[1] = index;
indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- /* undefined vertex */
- LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(),
- 0xffff, 0), id_ptr);
+ /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
+ val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
+ shift = LLVMConstInt(LLVMInt32Type(), 12, 0);
+ val = LLVMBuildShl(builder, val, shift, "");
+ /* add clipmask:12 */
+ val = LLVMBuildOr(builder, val, clipmask, "");
+
+ /* store vertex header */
+ LLVMBuildStore(builder, val, id_ptr);
+
#if DEBUG_STORE
lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
LLVMValueRef io_ptr,
LLVMValueRef aos[NUM_CHANNELS],
int attrib,
- int num_outputs)
+ int num_outputs,
+ LLVMValueRef clipmask)
{
LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
-
+ LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
+
debug_assert(NUM_CHANNELS == 4);
io0_ptr = LLVMBuildGEP(builder, io_ptr,
io3_ptr = LLVMBuildGEP(builder, io_ptr,
&ind3, 1, "");
+ clipmask0 = LLVMBuildExtractElement(builder, clipmask,
+ ind0, "");
+ clipmask1 = LLVMBuildExtractElement(builder, clipmask,
+ ind1, "");
+ clipmask2 = LLVMBuildExtractElement(builder, clipmask,
+ ind2, "");
+ clipmask3 = LLVMBuildExtractElement(builder, clipmask,
+ ind3, "");
+
#if DEBUG_STORE
- lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n",
- io_ptr, ind0, ind1, ind2, ind3);
+ lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
+ io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
#endif
-
- store_aos(builder, io0_ptr, attr_index, aos[0]);
- store_aos(builder, io1_ptr, attr_index, aos[1]);
- store_aos(builder, io2_ptr, attr_index, aos[2]);
- store_aos(builder, io3_ptr, attr_index, aos[3]);
+ /* store for each of the 4 vertices */
+ store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0);
+ store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1);
+ store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2);
+ store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);
}
static void
convert_to_aos(LLVMBuilderRef builder,
LLVMValueRef io,
LLVMValueRef (*outputs)[NUM_CHANNELS],
+ LLVMValueRef clipmask,
int num_outputs,
int max_vertices)
{
io,
aos,
attrib,
- num_outputs);
+ num_outputs,
+ clipmask);
}
#if DEBUG_STORE
lp_build_printf(builder, " # storing end\n");
#endif
}
+/*
+ * Stores original vertex positions in clip coordinates
+ * There is probably a more efficient way to do this, 4 floats at once
+ * rather than extracting each element one by one.
+ */
+static void
+store_clip(LLVMBuilderRef builder,
+ LLVMValueRef io_ptr,
+ LLVMValueRef (*outputs)[NUM_CHANNELS])
+{
+ LLVMValueRef out[4];
+ LLVMValueRef indices[2];
+ LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
+ LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
+ LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
+ LLVMValueRef out0elem, out1elem, out2elem, out3elem;
+
+ LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 y0 z0 w0*/
+ out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*x1 y1 z1 w1*/
+ out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*x2 y2 z2 w2*/
+ out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*x3 y3 z3 w3*/
+
+ io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
+ io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
+ io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
+ io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
+
+ clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
+ clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
+ clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
+ clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
+
+ for (int i = 0; i<4; i++){
+ clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
+ indices, 2, ""); //x1
+ clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
+ indices, 2, ""); //y1
+ clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
+ indices, 2, ""); //z1
+ clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
+ indices, 2, ""); //w1
+
+ out0elem = LLVMBuildExtractElement(builder, out[0],
+ indices[1], ""); //x1
+ out1elem = LLVMBuildExtractElement(builder, out[1],
+ indices[1], ""); //y1
+ out2elem = LLVMBuildExtractElement(builder, out[2],
+ indices[1], ""); //z1
+ out3elem = LLVMBuildExtractElement(builder, out[3],
+ indices[1], ""); //w1
+
+ LLVMBuildStore(builder, out0elem, clip0_ptr);
+ LLVMBuildStore(builder, out1elem, clip1_ptr);
+ LLVMBuildStore(builder, out2elem, clip2_ptr);
+ LLVMBuildStore(builder, out3elem, clip3_ptr);
+
+ indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
+ }
+
+}
+
+/*
+ * Transforms the outputs for viewport mapping
+ */
+static void
+generate_viewport(struct draw_llvm *llvm,
+ LLVMBuilderRef builder,
+ LLVMValueRef (*outputs)[NUM_CHANNELS])
+{
+ int i;
+ const float *scaleA = llvm->draw->viewport.scale;
+ const float *transA = llvm->draw->viewport.translate;
+ struct lp_type f32_type = lp_type_float_vec(32);
+ LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+ LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
+
+ /* for 1/w convention*/
+ out3 = LLVMBuildFDiv(builder, const1, out3, "");
+
+ /* Viewport Mapping */
+ for (i=0; i<4; i++){
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
+ LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/
+ LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/
+
+ /* divide by w */
+ out = LLVMBuildMul(builder, out, out3, "");
+ /* mult by scale */
+ out = LLVMBuildMul(builder, out, scale, "");
+ /* add translation */
+ out = LLVMBuildAdd(builder, out, trans, "");
+
+ /* store transformed outputs */
+ LLVMBuildStore(builder, out, outputs[0][i]);
+ }
+
+}
+
+/*
+ * Returns clipmask as 4xi32 bitmask for the 4 vertices
+ */
+static LLVMValueRef
+generate_clipmask(LLVMBuilderRef builder,
+ LLVMValueRef (*outputs)[NUM_CHANNELS])
+{
+ LLVMValueRef mask; /* stores the <4xi32> clipmasks */
+ LLVMValueRef test, temp;
+ LLVMValueRef zero, shift;
+ LLVMValueRef pos_x, pos_y, pos_z, pos_w;
+
+ struct lp_type f32_type = lp_type_float_vec(32);
+
+ zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
+ shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */
+
+ /* Assuming position stored at output[0] */
+ pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
+ pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
+ pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
+ pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+
+ /* Cliptest, for hardwired planes */
+ /* plane 1 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
+ temp = shift;
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = test;
+
+ /* plane 2 */
+ test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 3 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 4 */
+ test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 5 */
+ test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 6 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ return mask;
+
+}
+
+/*
+ * Returns boolean if any clipping has occurred
+ * Used zero/non-zero i32 value to represent boolean
+ */
+static void
+clipmask_bool(LLVMBuilderRef builder,
+ LLVMValueRef clipmask,
+ LLVMValueRef ret_ptr)
+{
+ LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
+ LLVMValueRef temp;
+ int i;
+
+ LLVMDumpValue(clipmask);
+
+ for (i=0; i<4; i++){
+ temp = LLVMBuildExtractElement(builder, clipmask,
+ LLVMConstInt(LLVMInt32Type(), i, 0) , "");
+ ret = LLVMBuildOr(builder, ret, temp, "");
+ LLVMDumpValue(ret);
+ }
+
+ LLVMBuildStore(builder, ret, ret_ptr);
+ LLVMDumpValue(ret_ptr);
+
+}
+
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
void *code;
struct lp_build_sampler_soa *sampler = 0;
+ LLVMValueRef ret, ret_ptr;
+
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
arg_types[7] = LLVMInt32Type(); /* instance_id */
- func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+ func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* function will return non-zero i32 value if any clipped vertices */
+ ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
+ LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
+
/* code generated texture sampling */
sampler = draw_llvm_sampler_soa_create(
draw_llvm_variant_key_samplers(&variant->key),
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
LLVMValueRef io;
+ LLVMValueRef clipmask; /* holds the clipmask value */
const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
context_ptr,
sampler);
- convert_to_aos(builder, io, outputs,
+ /* store original positions in clip before further manipulation */
+ store_clip(builder, io, outputs);
+
+ /* allocate clipmask, assign it integer type */
+ clipmask = generate_clipmask(builder, outputs);
+ clipmask_bool(builder, clipmask, ret_ptr);
+
+ /* do viewport mapping */
+ generate_viewport(llvm, builder, outputs);
+
+ /* store clipmask in vertex header and positions in data */
+ convert_to_aos(builder, io, outputs, clipmask,
draw->vs.vertex_shader->info.num_outputs,
max_vertices);
}
+
lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
sampler->destroy(sampler);
lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
#endif
- LLVMBuildRetVoid(builder);
-
+ ret = LLVMBuildLoad(builder, ret_ptr,"");
+ LLVMBuildRet(builder, ret);
+
LLVMDisposeBuilder(builder);
/*
LLVMValueRef fetch_max;
void *code;
struct lp_build_sampler_soa *sampler = 0;
+ LLVMValueRef ret, ret_ptr;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
arg_types[7] = LLVMInt32Type(); /* instance_id */
- func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+ func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
- variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
- func_type);
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
for(i = 0; i < Elements(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
LLVMConstInt(LLVMInt32Type(), 1, 0),
"fetch_max");
+ /* function returns non-zero i32 value if any clipped vertices */
+ ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
+ LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
+
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
LLVMValueRef io;
+ LLVMValueRef clipmask; /* holds the clipmask value */
const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
io_itr = lp_loop.counter;
context_ptr,
sampler);
- convert_to_aos(builder, io, outputs,
+ /* store original positions in clip before further manipulation */
+ store_clip(builder, io, outputs);
+
+ /* allocate clipmask, assign it integer type */
+ clipmask = generate_clipmask(builder, outputs);
+ clipmask_bool(builder, clipmask, ret_ptr);
+
+ /* do viewport mapping */
+ generate_viewport(llvm, builder, outputs);
+
+ /* store clipmask in vertex header,
+ * original positions in clip
+ * and transformed positions in data
+ */
+ convert_to_aos(builder, io, outputs, clipmask,
draw->vs.vertex_shader->info.num_outputs,
max_vertices);
}
+
lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
sampler->destroy(sampler);
lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
#endif
- LLVMBuildRetVoid(builder);
-
+ ret = LLVMBuildLoad(builder, ret_ptr,"");
+ LLVMBuildRet(builder, ret);
+
LLVMDisposeBuilder(builder);
/*