X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fdraw%2Fdraw_llvm.c;h=2b5f01cda74c4091d77e0faf39eea1883e40578f;hb=b2ddb93ff3b8c88682634ccdef247967e31fab84;hp=42653d36ec4aa5d43a28853215e6810841f926e3;hpb=01eebfe1b6de2e36dd3af0952fc8329b7073a100;p=mesa.git diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 42653d36ec4..2b5f01cda74 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,17 +31,22 @@ #include "draw_vs.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" -#include "util/u_cpu_detect.h" +#include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" @@ -69,12 +74,17 @@ init_globals(struct draw_llvm *llvm) elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); + PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); + elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatType(), 4); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -99,6 +109,18 @@ init_globals(struct draw_llvm *llvm) LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, llvm->target, texture_type, DRAW_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, + llvm->target, texture_type, + DRAW_JIT_TEXTURE_BORDER_COLOR); LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, llvm->target, texture_type); @@ -108,12 +130,14 @@ init_globals(struct draw_llvm *llvm) /* struct draw_jit_context */ { - LLVMTypeRef elem_types[3]; + LLVMTypeRef elem_types[5]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, + elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ + elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ + elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ + elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_VERTEX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -122,6 +146,8 @@ init_globals(struct draw_llvm *llvm) llvm->target, context_type, 0); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, llvm->target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, + llvm->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, llvm->target, context_type, DRAW_JIT_CTX_TEXTURES); @@ -208,13 +234,6 @@ draw_llvm_create(struct draw_context *draw) { struct draw_llvm *llvm; -#ifdef PIPE_ARCH_X86 - util_cpu_detect(); - /* require SSE2 due to LLVM PR6960. */ - if (!util_cpu_caps.has_sse2) - return NULL; -#endif - llvm = CALLOC_STRUCT( draw_llvm ); if (!llvm) return NULL; @@ -238,16 +257,23 @@ draw_llvm_create(struct draw_context *draw) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. + + if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { + /* For LLVM >= 2.7 and 32-bit build, use this order of passes to + * avoid generating bad code. + * Test with piglit glsl-vs-sqrt-zero test. */ - LLVMAddInstructionCombiningPass(llvm->pass); + LLVMAddConstantPropagationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); } + else { + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); + LLVMAddConstantPropagationPass(llvm->pass); + } + + LLVMAddInstructionCombiningPass(llvm->pass); LLVMAddGVNPass(llvm->pass); } else { /* We need at least this pass to prevent the backends to fail in @@ -277,15 +303,23 @@ draw_llvm_destroy(struct draw_llvm *llvm) } struct draw_llvm_variant * -draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) +draw_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_inputs, + const struct draw_llvm_variant_key *key) { - struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + struct draw_llvm_variant *variant; struct llvm_vertex_shader *shader = llvm_vertex_shader(llvm->draw->vs.vertex_shader); + variant = MALLOC(sizeof *variant + + shader->variant_key_size - + sizeof variant->key); + if (variant == NULL) + return NULL; + variant->llvm = llvm; - draw_llvm_make_variant_key(llvm, &variant->key); + memcpy(&variant->key, key, shader->variant_key_size); llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); @@ -307,11 +341,12 @@ generate_vs(struct draw_llvm *llvm, LLVMValueRef (*outputs)[NUM_CHANNELS], const LLVMValueRef (*inputs)[NUM_CHANNELS], LLVMValueRef context_ptr, - struct lp_build_sampler_soa *sampler) + struct lp_build_sampler_soa *draw_sampler) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + struct lp_build_sampler_soa *sampler = 0; memset(&vs_type, 0, sizeof vs_type); vs_type.floating = TRUE; /* floating point values */ @@ -327,6 +362,10 @@ generate_vs(struct draw_llvm *llvm, tgsi_dump(tokens, 0); } + if (llvm->draw->num_sampler_views && + llvm->draw->num_samplers) + sampler = draw_sampler; + lp_build_tgsi_soa(builder, tokens, vs_type, @@ -363,7 +402,8 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef *res, struct pipe_vertex_element *velem, LLVMValueRef vbuf, - LLVMValueRef index) + LLVMValueRef index, + LLVMValueRef instance_id) { LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, @@ -374,8 +414,15 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef cond; LLVMValueRef stride; - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + if (velem->instance_divisor) { + /* array index = instance_id / instance_divisor */ + index = LLVMBuildUDiv(builder, instance_id, + LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), + "instance_divisor"); + } + /* limit index to min(index, vb_max_index) */ + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); stride = LLVMBuildMul(builder, vb_stride, index, ""); @@ -503,19 +550,28 @@ static void store_aos(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value) + LLVMValueRef value, + LLVMValueRef clipmask) { LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); LLVMValueRef indices[3]; + LLVMValueRef val, shift; indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); indices[1] = index; indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); - /* undefined vertex */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), - 0xffff, 0), id_ptr); + /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ + val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); + shift = LLVMConstInt(LLVMInt32Type(), 12, 0); + val = LLVMBuildShl(builder, val, shift, ""); + /* add clipmask:12 */ + val = LLVMBuildOr(builder, val, clipmask, ""); + + /* store vertex header */ + LLVMBuildStore(builder, val, id_ptr); + #if DEBUG_STORE lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -570,7 +626,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef io_ptr, LLVMValueRef aos[NUM_CHANNELS], int attrib, - int num_outputs) + int num_outputs, + LLVMValueRef clipmask) { LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); @@ -578,7 +635,8 @@ store_aos_array(LLVMBuilderRef builder, LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - + LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + debug_assert(NUM_CHANNELS == 4); io0_ptr = LLVMBuildGEP(builder, io_ptr, @@ -590,21 +648,31 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + clipmask0 = LLVMBuildExtractElement(builder, clipmask, + ind0, ""); + clipmask1 = LLVMBuildExtractElement(builder, clipmask, + ind1, ""); + clipmask2 = LLVMBuildExtractElement(builder, clipmask, + ind2, ""); + clipmask3 = LLVMBuildExtractElement(builder, clipmask, + ind3, ""); + #if DEBUG_STORE - lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", - io_ptr, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", + io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif - - store_aos(builder, io0_ptr, attr_index, aos[0]); - store_aos(builder, io1_ptr, attr_index, aos[1]); - store_aos(builder, io2_ptr, attr_index, aos[2]); - store_aos(builder, io3_ptr, attr_index, aos[3]); + /* store for each of the 4 vertices */ + store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); + store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); + store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); + store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); } static void convert_to_aos(LLVMBuilderRef builder, LLVMValueRef io, LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef clipmask, int num_outputs, int max_vertices) { @@ -633,33 +701,330 @@ convert_to_aos(LLVMBuilderRef builder, io, aos, attrib, - num_outputs); + num_outputs, + clipmask); } #if DEBUG_STORE lp_build_printf(builder, " # storing end\n"); #endif } +/* + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + */ +static void +store_clip(LLVMBuilderRef builder, + LLVMValueRef io_ptr, + LLVMValueRef (*outputs)[NUM_CHANNELS]) +{ + LLVMValueRef out[4]; + LLVMValueRef indices[2]; + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; + LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; + LLVMValueRef out0elem, out1elem, out2elem, out3elem; + int i; + + LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); + + out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + + clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); + clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); + clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); + clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); + + for (i = 0; i<4; i++){ + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, + indices, 2, ""); //x0 + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, + indices, 2, ""); //x1 + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, + indices, 2, ""); //x2 + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, + indices, 2, ""); //x3 + + out0elem = LLVMBuildExtractElement(builder, out[i], + ind0, ""); //x0 + out1elem = LLVMBuildExtractElement(builder, out[i], + ind1, ""); //x1 + out2elem = LLVMBuildExtractElement(builder, out[i], + ind2, ""); //x2 + out3elem = LLVMBuildExtractElement(builder, out[i], + ind3, ""); //x3 + + LLVMBuildStore(builder, out0elem, clip0_ptr); + LLVMBuildStore(builder, out1elem, clip1_ptr); + LLVMBuildStore(builder, out2elem, clip2_ptr); + LLVMBuildStore(builder, out3elem, clip3_ptr); + + indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + } + +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +/* + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + int i; + struct lp_type f32_type = lp_type_float_vec(32); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); + + /* for 1/w convention*/ + out3 = LLVMBuildFDiv(builder, const1, out3, ""); + LLVMBuildStore(builder, out3, outputs[0][3]); + + /* Viewport Mapping */ + for (i=0; i<3; i++){ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef scale; + LLVMValueRef trans; + LLVMValueRef scale_i; + LLVMValueRef trans_i; + LLVMValueRef index; + + index = LLVMConstInt(LLVMInt32Type(), i, 0); + scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + index = LLVMConstInt(LLVMInt32Type(), i+4, 0); + trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); + trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); + + /* divide by w */ + out = LLVMBuildFMul(builder, out, out3, ""); + /* mult by scale */ + out = LLVMBuildFMul(builder, out, scale, ""); + /* add translation */ + out = LLVMBuildFAdd(builder, out, trans, ""); + + /* store transformed outputs */ + LLVMBuildStore(builder, out, outputs[0][i]); + } + +} + + +/* + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef +generate_clipmask(LLVMBuilderRef builder, + LLVMValueRef (*outputs)[NUM_CHANNELS], + boolean clip_xy, + boolean clip_z, + boolean clip_user, + boolean clip_halfz, + unsigned nr, + LLVMValueRef context_ptr) +{ + LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef test, temp; + LLVMValueRef zero, shift; + LLVMValueRef pos_x, pos_y, pos_z, pos_w; + LLVMValueRef plane1, planes, plane_ptr, sum; + + unsigned i; + + struct lp_type f32_type = lp_type_float_vec(32); + + mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); + zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ + + /* Assuming position stored at output[0] */ + pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ + pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ + pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ + pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + + /* Cliptest, for hardwired planes */ + if (clip_xy){ + /* plane 1 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + temp = shift; + test = LLVMBuildAnd(builder, test, temp, ""); + mask = test; + + /* plane 2 */ + test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 3 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 4 */ + test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_z){ + temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); + if (clip_halfz){ + /* plane 5 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + else{ + /* plane 5 */ + test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + /* plane 6 */ + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_user){ + LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); + LLVMValueRef indices[3]; + temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); + + /* userclip planes */ + for (i = 6; i < nr; i++) { + indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); + planes = vec4f_from_scalar(builder, plane1, "plane4_x"); + sum = LLVMBuildFMul(builder, planes, pos_x, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); + planes = vec4f_from_scalar(builder, plane1, "plane4_y"); + test = LLVMBuildFMul(builder, planes, pos_y, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); + planes = vec4f_from_scalar(builder, plane1, "plane4_z"); + test = LLVMBuildFMul(builder, planes, pos_z, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); + planes = vec4f_from_scalar(builder, plane1, "plane4_w"); + test = LLVMBuildFMul(builder, planes, pos_w, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + } + return mask; +} + +/* + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean + */ +static void +clipmask_bool(LLVMBuilderRef builder, + LLVMValueRef clipmask, + LLVMValueRef ret_ptr) +{ + LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMValueRef temp; + int i; + + for (i=0; i<4; i++){ + temp = LLVMBuildExtractElement(builder, clipmask, + LLVMConstInt(LLVMInt32Type(), i, 0) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); + } + + LLVMBuildStore(builder, ret, ret_ptr); +} + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef start, end, count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -667,8 +1032,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) arg_types[4] = LLVMInt32Type(); /* count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); @@ -683,6 +1049,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) count = LLVMGetParam(variant->function, 4); stride = LLVMGetParam(variant->function, 5); vb_ptr = LLVMGetParam(variant->function, 6); + instance_id = LLVMGetParam(variant->function, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -691,6 +1058,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_build_name(count, "count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -700,15 +1068,20 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld, builder, lp_type_int(32)); end = lp_build_add(&bld, start, count); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* function will return non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + /* code generated texture sampling */ - sampler = draw_llvm_sampler_soa_create(variant->key.sampler, - context_ptr); + sampler = draw_llvm_sampler_soa_create( + draw_llvm_variant_key_samplers(&variant->key), + context_ptr); #if DEBUG_STORE lp_build_printf(builder, "start = %d, end = %d, step = %d\n", @@ -719,6 +1092,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); @@ -740,7 +1114,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -754,16 +1129,44 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header and positions in data */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -789,31 +1192,36 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function); } static void draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { - LLVMTypeRef arg_types[7]; + LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + LLVMValueRef instance_id; struct draw_context *draw = llvm->draw; unsigned i, j; struct lp_build_context bld; - struct lp_build_context bld_int; struct lp_build_loop_state lp_loop; - struct lp_type vs_type = lp_type_float_vec(32); const int max_vertices = 4; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef fetch_max; void *code; struct lp_build_sampler_soa *sampler = 0; - + LLVMValueRef ret, ret_ptr; + boolean bypass_viewport = variant->key.bypass_viewport; + boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + arg_types[0] = llvm->context_ptr_type; /* context */ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ @@ -821,14 +1229,16 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian arg_types[4] = LLVMInt32Type(); /* fetch_count */ arg_types[5] = LLVMInt32Type(); /* stride */ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + arg_types[7] = LLVMInt32Type(); /* instance_id */ - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); for(i = 0; i < Elements(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), + LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function_elts, 0); io_ptr = LLVMGetParam(variant->function_elts, 1); @@ -837,6 +1247,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian fetch_count = LLVMGetParam(variant->function_elts, 4); stride = LLVMGetParam(variant->function_elts, 5); vb_ptr = LLVMGetParam(variant->function_elts, 6); + instance_id = LLVMGetParam(variant->function_elts, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); @@ -845,6 +1256,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian lp_build_name(fetch_count, "fetch_count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); + lp_build_name(instance_id, "instance_id"); /* * Function body @@ -854,24 +1266,29 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, vs_type); - lp_build_context_init(&bld_int, builder, lp_type_int(32)); + lp_build_context_init(&bld, builder, lp_type_int(32)); step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); /* code generated texture sampling */ - sampler = draw_llvm_sampler_soa_create(variant->key.sampler, - context_ptr); + sampler = draw_llvm_sampler_soa_create( + draw_llvm_variant_key_samplers(&variant->key), + context_ptr); fetch_max = LLVMBuildSub(builder, fetch_count, LLVMConstInt(LLVMInt32Type(), 1, 0), "fetch_max"); + /* function returns non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); + LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; + LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -890,7 +1307,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ - true_index = lp_build_min(&bld_int, true_index, fetch_max); + true_index = lp_build_min(&bld, true_index, fetch_max); fetch_ptr = LLVMBuildGEP(builder, fetch_elts, &true_index, 1, ""); @@ -903,7 +1320,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index); + &aos_attribs[j][i], velem, vb, true_index, + instance_id); } } convert_to_soa(builder, aos_attribs, inputs, @@ -917,16 +1335,47 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian context_ptr, sampler); - convert_to_aos(builder, io, outputs, + /* store original positions in clip before further manipulation */ + store_clip(builder, io, outputs); + + /* do cliptest */ + if (enable_cliptest){ + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(builder, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.nr_planes, + context_ptr); + /* return clipping boolean value for function */ + clipmask_bool(builder, clipmask, ret_ptr); + } + else{ + clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport){ + generate_viewport(llvm, builder, outputs, context_ptr); + } + + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(builder, io, outputs, clipmask, draw->vs.vertex_shader->info.num_outputs, max_vertices); } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); sampler->destroy(sampler); - LLVMBuildRetVoid(builder); - + ret = LLVMBuildLoad(builder, ret_ptr,""); + LLVMBuildRet(builder, ret); + LLVMDisposeBuilder(builder); /* @@ -952,33 +1401,55 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } + lp_func_delete_body(variant->function_elts); } -void -draw_llvm_make_variant_key(struct draw_llvm *llvm, - struct draw_llvm_variant_key *key) + +struct draw_llvm_variant_key * +draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) { unsigned i; + struct draw_llvm_variant_key *key; + struct lp_sampler_static_state *sampler; - memset(key, 0, sizeof(struct draw_llvm_variant_key)); + key = (struct draw_llvm_variant_key *)store; + /* Presumably all variants of the shader should have the same + * number of vertex elements - ie the number of shader inputs. + */ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + /* will have to rig this up properly later */ + key->clip_xy = llvm->draw->clip_xy; + key->clip_z = llvm->draw->clip_z; + key->clip_user = llvm->draw->clip_user; + key->bypass_viewport = llvm->draw->identity_viewport; + key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; + key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); + key->nr_planes = llvm->draw->nr_planes; + key->pad = 0; + + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + + sampler = draw_llvm_variant_key_samplers(key); + memcpy(key->vertex_element, llvm->draw->pt.vertex_element, sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); + + memset(sampler, 0, key->nr_samplers * sizeof *sampler); - memcpy(&key->vs, - &llvm->draw->vs.vertex_shader->state, - sizeof(struct pipe_shader_state)); - - for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) { - struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader; - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) - lp_sampler_static_state(&key->sampler[i], - llvm->draw->sampler_views[i], - llvm->draw->samplers[i]); + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_state(&sampler[i], + llvm->draw->sampler_views[i], + llvm->draw->samplers[i]); } + + return key; } void @@ -986,14 +1457,14 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; - assert(sampler_idx <= PIPE_MAX_VERTEX_SAMPLERS); + assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; @@ -1010,6 +1481,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, } } + +void +draw_llvm_set_sampler_state(struct draw_context *draw) +{ + unsigned i; + + for (i = 0; i < draw->num_samplers; i++) { + struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i]; + + if (draw->samplers[i]) { + jit_tex->min_lod = draw->samplers[i]->min_lod; + jit_tex->max_lod = draw->samplers[i]->max_lod; + jit_tex->lod_bias = draw->samplers[i]->lod_bias; + COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); + } + } +} + + void draw_llvm_destroy_variant(struct draw_llvm_variant *variant) {