+/*
+ * Stores original vertex positions in clip coordinates
+ * There is probably a more efficient way to do this, 4 floats at once
+ * rather than extracting each element one by one.
+ */
+static void
+store_clip(LLVMBuilderRef builder,
+ LLVMValueRef io_ptr,
+ LLVMValueRef (*outputs)[NUM_CHANNELS])
+{
+ LLVMValueRef out[4];
+ LLVMValueRef indices[2];
+ LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
+ LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
+ LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
+ LLVMValueRef out0elem, out1elem, out2elem, out3elem;
+ int i;
+
+ LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
+ out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
+ out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
+ out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+
+ io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
+ io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
+ io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
+ io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
+
+ clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
+ clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
+ clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
+ clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
+
+ for (i = 0; i<4; i++){
+ clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
+ indices, 2, ""); //x0
+ clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
+ indices, 2, ""); //x1
+ clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
+ indices, 2, ""); //x2
+ clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
+ indices, 2, ""); //x3
+
+ out0elem = LLVMBuildExtractElement(builder, out[i],
+ ind0, ""); //x0
+ out1elem = LLVMBuildExtractElement(builder, out[i],
+ ind1, ""); //x1
+ out2elem = LLVMBuildExtractElement(builder, out[i],
+ ind2, ""); //x2
+ out3elem = LLVMBuildExtractElement(builder, out[i],
+ ind3, ""); //x3
+
+ LLVMBuildStore(builder, out0elem, clip0_ptr);
+ LLVMBuildStore(builder, out1elem, clip1_ptr);
+ LLVMBuildStore(builder, out2elem, clip2_ptr);
+ LLVMBuildStore(builder, out3elem, clip3_ptr);
+
+ indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
+ }
+
+}
+
+/* Equivalent of _mm_set1_ps(a)
+ */
+static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
+ LLVMValueRef a,
+ const char *name)
+{
+ LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
+ }
+
+ return res;
+}
+
+/*
+ * Transforms the outputs for viewport mapping
+ */
+static void
+generate_viewport(struct draw_llvm *llvm,
+ LLVMBuilderRef builder,
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ LLVMValueRef context_ptr)
+{
+ int i;
+ struct lp_type f32_type = lp_type_float_vec(32);
+ LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+ LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
+ LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr);
+
+ /* for 1/w convention*/
+ out3 = LLVMBuildFDiv(builder, const1, out3, "");
+ LLVMBuildStore(builder, out3, outputs[0][3]);
+
+ /* Viewport Mapping */
+ for (i=0; i<3; i++){
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
+ LLVMValueRef scale;
+ LLVMValueRef trans;
+ LLVMValueRef scale_i;
+ LLVMValueRef trans_i;
+ LLVMValueRef index;
+
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
+
+ index = LLVMConstInt(LLVMInt32Type(), i+4, 0);
+ trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
+
+ scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale");
+ trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans");
+
+ /* divide by w */
+ out = LLVMBuildFMul(builder, out, out3, "");
+ /* mult by scale */
+ out = LLVMBuildFMul(builder, out, scale, "");
+ /* add translation */
+ out = LLVMBuildFAdd(builder, out, trans, "");
+
+ /* store transformed outputs */
+ LLVMBuildStore(builder, out, outputs[0][i]);
+ }
+
+}
+
+
+/*
+ * Returns clipmask as 4xi32 bitmask for the 4 vertices
+ */
+static LLVMValueRef
+generate_clipmask(LLVMBuilderRef builder,
+ LLVMValueRef (*outputs)[NUM_CHANNELS],
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
+ boolean clip_halfz,
+ unsigned nr,
+ LLVMValueRef context_ptr)
+{
+ LLVMValueRef mask; /* stores the <4xi32> clipmasks */
+ LLVMValueRef test, temp;
+ LLVMValueRef zero, shift;
+ LLVMValueRef pos_x, pos_y, pos_z, pos_w;
+ LLVMValueRef plane1, planes, plane_ptr, sum;
+
+ unsigned i;
+
+ struct lp_type f32_type = lp_type_float_vec(32);
+
+ mask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
+ temp = lp_build_const_int_vec(lp_type_int_vec(32), 0);
+ zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
+ shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */
+
+ /* Assuming position stored at output[0] */
+ pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
+ pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
+ pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
+ pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+
+ /* Cliptest, for hardwired planes */
+ if (clip_xy){
+ /* plane 1 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
+ temp = shift;
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = test;
+
+ /* plane 2 */
+ test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 3 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+
+ /* plane 4 */
+ test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
+
+ if (clip_z){
+ temp = lp_build_const_int_vec(lp_type_int_vec(32), 16);
+ if (clip_halfz){
+ /* plane 5 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
+ else{
+ /* plane 5 */
+ test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
+ /* plane 6 */
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
+
+ if (clip_user){
+ LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr);
+ LLVMValueRef indices[3];
+ temp = lp_build_const_int_vec(lp_type_int_vec(32), 32);
+
+ /* userclip planes */
+ for (i = 6; i < nr; i++) {
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0);
+
+ indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+ plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
+ planes = vec4f_from_scalar(builder, plane1, "plane4_x");
+ sum = LLVMBuildFMul(builder, planes, pos_x, "");
+
+ indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+ plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
+ planes = vec4f_from_scalar(builder, plane1, "plane4_y");
+ test = LLVMBuildFMul(builder, planes, pos_y, "");
+ sum = LLVMBuildFAdd(builder, sum, test, "");
+
+ indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+ plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
+ planes = vec4f_from_scalar(builder, plane1, "plane4_z");
+ test = LLVMBuildFMul(builder, planes, pos_z, "");
+ sum = LLVMBuildFAdd(builder, sum, test, "");
+
+ indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
+ plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
+ plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
+ planes = vec4f_from_scalar(builder, plane1, "plane4_w");
+ test = LLVMBuildFMul(builder, planes, pos_w, "");
+ sum = LLVMBuildFAdd(builder, sum, test, "");
+
+ test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum);
+ temp = LLVMBuildShl(builder, temp, shift, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
+ }
+ return mask;
+}
+
+/*
+ * Returns boolean if any clipping has occurred
+ * Used zero/non-zero i32 value to represent boolean
+ */
+static void
+clipmask_bool(LLVMBuilderRef builder,
+ LLVMValueRef clipmask,
+ LLVMValueRef ret_ptr)
+{
+ LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
+ LLVMValueRef temp;
+ int i;
+
+ for (i=0; i<4; i++){
+ temp = LLVMBuildExtractElement(builder, clipmask,
+ LLVMConstInt(LLVMInt32Type(), i, 0) , "");
+ ret = LLVMBuildOr(builder, ret, temp, "");
+ }
+
+ LLVMBuildStore(builder, ret, ret_ptr);
+}
+