draw/llvmpipe: replace DRAW_MAX_TEXTURE_LEVELS with PIPE_MAX_TEXTURE_LEVELS
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
index 42653d36ec4aa5d43a28853215e6810841f926e3..d94340367c42a73009866c8e74495540dd5d23ab 100644 (file)
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_init.h"
 
 #include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_dump.h"
 
 #include "util/u_cpu_detect.h"
+#include "util/u_math.h"
 #include "util/u_pointer.h"
 #include "util/u_string.h"
 
@@ -69,12 +72,17 @@ init_globals(struct draw_llvm *llvm)
       elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
       elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
       elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
-         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
       elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
-         LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+         LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
       elem_types[DRAW_JIT_TEXTURE_DATA] =
          LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
-                       DRAW_MAX_TEXTURE_LEVELS);
+                       PIPE_MAX_TEXTURE_LEVELS);
+      elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
+      elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
+      elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
+      elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 
+         LLVMArrayType(LLVMFloatType(), 4);
 
       texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
@@ -99,6 +107,18 @@ init_globals(struct draw_llvm *llvm)
       LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
                              llvm->target, texture_type,
                              DRAW_JIT_TEXTURE_DATA);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_MIN_LOD);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_MAX_LOD);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_LOD_BIAS);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_BORDER_COLOR);
       LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
                            llvm->target, texture_type);
 
@@ -208,13 +228,6 @@ draw_llvm_create(struct draw_context *draw)
 {
    struct draw_llvm *llvm;
 
-#ifdef PIPE_ARCH_X86
-   util_cpu_detect();
-   /* require SSE2 due to LLVM PR6960. */
-   if (!util_cpu_caps.has_sse2)
-       return NULL;
-#endif
-
    llvm = CALLOC_STRUCT( draw_llvm );
    if (!llvm)
       return NULL;
@@ -238,9 +251,22 @@ draw_llvm_create(struct draw_context *draw)
       /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
        * but there are more on SVN. */
       /* TODO: Add more passes */
+
       LLVMAddCFGSimplificationPass(llvm->pass);
-      LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
-      LLVMAddConstantPropagationPass(llvm->pass);
+
+      if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
+         /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
+          * avoid generating bad code.
+          * Test with piglit glsl-vs-sqrt-zero test.
+          */
+         LLVMAddConstantPropagationPass(llvm->pass);
+         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+      }
+      else {
+         LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+         LLVMAddConstantPropagationPass(llvm->pass);
+      }
+
       if(util_cpu_caps.has_sse4_1) {
          /* FIXME: There is a bug in this pass, whereby the combination of fptosi
           * and sitofp (necessary for trunc/floor/ceil/round implementation)
@@ -277,15 +303,23 @@ draw_llvm_destroy(struct draw_llvm *llvm)
 }
 
 struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+                        unsigned num_inputs,
+                        const struct draw_llvm_variant_key *key)
 {
-   struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+   struct draw_llvm_variant *variant;
    struct llvm_vertex_shader *shader =
       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
 
+   variant = MALLOC(sizeof *variant +
+                   shader->variant_key_size -
+                   sizeof variant->key);
+   if (variant == NULL)
+      return NULL;
+
    variant->llvm = llvm;
 
-   draw_llvm_make_variant_key(llvm, &variant->key);
+   memcpy(&variant->key, key, shader->variant_key_size);
 
    llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
 
@@ -307,11 +341,12 @@ generate_vs(struct draw_llvm *llvm,
             LLVMValueRef (*outputs)[NUM_CHANNELS],
             const LLVMValueRef (*inputs)[NUM_CHANNELS],
             LLVMValueRef context_ptr,
-            struct lp_build_sampler_soa *sampler)
+            struct lp_build_sampler_soa *draw_sampler)
 {
    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
    struct lp_type vs_type;
    LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+   struct lp_build_sampler_soa *sampler = 0;
 
    memset(&vs_type, 0, sizeof vs_type);
    vs_type.floating = TRUE; /* floating point values */
@@ -327,6 +362,10 @@ generate_vs(struct draw_llvm *llvm,
       tgsi_dump(tokens, 0);
    }
 
+   if (llvm->draw->num_sampler_views &&
+       llvm->draw->num_samplers)
+      sampler = draw_sampler;
+
    lp_build_tgsi_soa(builder,
                      tokens,
                      vs_type,
@@ -363,7 +402,8 @@ generate_fetch(LLVMBuilderRef builder,
                LLVMValueRef *res,
                struct pipe_vertex_element *velem,
                LLVMValueRef vbuf,
-               LLVMValueRef index)
+               LLVMValueRef index,
+               LLVMValueRef instance_id)
 {
    LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
    LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
@@ -374,8 +414,15 @@ generate_fetch(LLVMBuilderRef builder,
    LLVMValueRef cond;
    LLVMValueRef stride;
 
-   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+   if (velem->instance_divisor) {
+      /* array index = instance_id / instance_divisor */
+      index = LLVMBuildUDiv(builder, instance_id,
+                            LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
+                            "instance_divisor");
+   }
 
+   /* limit index to min(inex, vb_max_index) */
+   cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
    index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
 
    stride = LLVMBuildMul(builder, vb_stride, index, "");
@@ -643,18 +690,18 @@ convert_to_aos(LLVMBuilderRef builder,
 static void
 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 {
-   LLVMTypeRef arg_types[7];
+   LLVMTypeRef arg_types[8];
    LLVMTypeRef func_type;
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef start, end, count, stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+   LLVMValueRef instance_id;
    struct draw_context *draw = llvm->draw;
    unsigned i, j;
    struct lp_build_context bld;
    struct lp_build_loop_state lp_loop;
-   struct lp_type vs_type = lp_type_float_vec(32);
    const int max_vertices = 4;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    void *code;
@@ -667,6 +714,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    arg_types[4] = LLVMInt32Type();                  /* count */
    arg_types[5] = LLVMInt32Type();                  /* stride */
    arg_types[6] = llvm->vb_ptr_type;                /* pipe_vertex_buffer's */
+   arg_types[7] = LLVMInt32Type();                  /* instance_id */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
@@ -683,6 +731,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    count        = LLVMGetParam(variant->function, 4);
    stride       = LLVMGetParam(variant->function, 5);
    vb_ptr       = LLVMGetParam(variant->function, 6);
+   instance_id  = LLVMGetParam(variant->function, 7);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
@@ -691,6 +740,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    lp_build_name(count, "count");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
+   lp_build_name(instance_id, "instance_id");
 
    /*
     * Function body
@@ -700,15 +750,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   lp_build_context_init(&bld, builder, vs_type);
+   lp_build_context_init(&bld, builder, lp_type_int(32));
 
    end = lp_build_add(&bld, start, count);
 
    step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
 
    /* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
-                                          context_ptr);
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
 
 #if DEBUG_STORE
    lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
@@ -740,7 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
                                            &vb_index, 1, "");
             generate_fetch(builder, vbuffers_ptr,
-                           &aos_attribs[j][i], velem, vb, true_index);
+                           &aos_attribs[j][i], velem, vb, true_index,
+                           instance_id);
          }
       }
       convert_to_soa(builder, aos_attribs, inputs,
@@ -762,6 +814,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 
    sampler->destroy(sampler);
 
+#ifdef PIPE_ARCH_X86
+   /* Avoid corrupting the FPU stack on 32bit OSes. */
+   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
@@ -789,25 +846,25 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
       lp_disassemble(code);
    }
+   lp_func_delete_body(variant->function);
 }
 
 
 static void
 draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 {
-   LLVMTypeRef arg_types[7];
+   LLVMTypeRef arg_types[8];
    LLVMTypeRef func_type;
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+   LLVMValueRef instance_id;
    struct draw_context *draw = llvm->draw;
    unsigned i, j;
    struct lp_build_context bld;
-   struct lp_build_context bld_int;
    struct lp_build_loop_state lp_loop;
-   struct lp_type vs_type = lp_type_float_vec(32);
    const int max_vertices = 4;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
    LLVMValueRef fetch_max;
@@ -821,14 +878,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    arg_types[4] = LLVMInt32Type();                      /* fetch_count */
    arg_types[5] = LLVMInt32Type();                      /* stride */
    arg_types[6] = llvm->vb_ptr_type;                    /* pipe_vertex_buffer's */
+   arg_types[7] = LLVMInt32Type();                      /* instance_id */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
-   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+   variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
+                                            func_type);
    LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
    for(i = 0; i < Elements(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+         LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+                          LLVMNoAliasAttribute);
 
    context_ptr  = LLVMGetParam(variant->function_elts, 0);
    io_ptr       = LLVMGetParam(variant->function_elts, 1);
@@ -837,6 +897,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    fetch_count  = LLVMGetParam(variant->function_elts, 4);
    stride       = LLVMGetParam(variant->function_elts, 5);
    vb_ptr       = LLVMGetParam(variant->function_elts, 6);
+   instance_id  = LLVMGetParam(variant->function_elts, 7);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
@@ -845,6 +906,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    lp_build_name(fetch_count, "fetch_count");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
+   lp_build_name(instance_id, "instance_id");
 
    /*
     * Function body
@@ -854,14 +916,14 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   lp_build_context_init(&bld, builder, vs_type);
-   lp_build_context_init(&bld_int, builder, lp_type_int(32));
+   lp_build_context_init(&bld, builder, lp_type_int(32));
 
    step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
 
    /* code generated texture sampling */
-   sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
-                                          context_ptr);
+   sampler = draw_llvm_sampler_soa_create(
+      draw_llvm_variant_key_samplers(&variant->key),
+      context_ptr);
 
    fetch_max = LLVMBuildSub(builder, fetch_count,
                             LLVMConstInt(LLVMInt32Type(), 1, 0),
@@ -890,7 +952,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
          /* make sure we're not out of bounds which can happen
           * if fetch_count % 4 != 0, because on the last iteration
           * a few of the 4 vertex fetches will be out of bounds */
-         true_index = lp_build_min(&bld_int, true_index, fetch_max);
+         true_index = lp_build_min(&bld, true_index, fetch_max);
 
          fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
                                   &true_index, 1, "");
@@ -903,7 +965,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
                                            &vb_index, 1, "");
             generate_fetch(builder, vbuffers_ptr,
-                           &aos_attribs[j][i], velem, vb, true_index);
+                           &aos_attribs[j][i], velem, vb, true_index,
+                           instance_id);
          }
       }
       convert_to_soa(builder, aos_attribs, inputs,
@@ -925,6 +988,11 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
 
    sampler->destroy(sampler);
 
+#ifdef PIPE_ARCH_X86
+   /* Avoid corrupting the FPU stack on 32bit OSes. */
+   lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
+#endif
+
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
@@ -952,33 +1020,45 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
       lp_disassemble(code);
    }
+   lp_func_delete_body(variant->function_elts);
 }
 
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
-                           struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
 {
    unsigned i;
+   struct draw_llvm_variant_key *key;
+   struct lp_sampler_static_state *sampler;
 
-   memset(key, 0, sizeof(struct draw_llvm_variant_key));
+   key = (struct draw_llvm_variant_key *)store;
 
+   /* Presumably all variants of the shader should have the same
+    * number of vertex elements - ie the number of shader inputs.
+    */
    key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
 
+   /* All variants of this shader will have the same value for
+    * nr_samplers.  Not yet trying to compact away holes in the
+    * sampler array.
+    */
+   key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+   sampler = draw_llvm_variant_key_samplers(key);
+
    memcpy(key->vertex_element,
           llvm->draw->pt.vertex_element,
           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+   
+   memset(sampler, 0, key->nr_samplers * sizeof *sampler);
 
-   memcpy(&key->vs,
-          &llvm->draw->vs.vertex_shader->state,
-          sizeof(struct pipe_shader_state));
-
-   for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
-      struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
-      if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
-         lp_sampler_static_state(&key->sampler[i],
-                                 llvm->draw->sampler_views[i],
-                                 llvm->draw->samplers[i]);
+   for (i = 0 ; i < key->nr_samplers; i++) {
+      lp_sampler_static_state(&sampler[i],
+                             llvm->draw->sampler_views[i],
+                             llvm->draw->samplers[i]);
    }
+
+   return key;
 }
 
 void
@@ -986,14 +1066,14 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
                              unsigned sampler_idx,
                              uint32_t width, uint32_t height, uint32_t depth,
                              uint32_t last_level,
-                             uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
-                             uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
-                             const void *data[DRAW_MAX_TEXTURE_LEVELS])
+                             uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+                             uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+                             const void *data[PIPE_MAX_TEXTURE_LEVELS])
 {
    unsigned j;
    struct draw_jit_texture *jit_tex;
 
-   assert(sampler_idx <= PIPE_MAX_VERTEX_SAMPLERS);
+   assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
 
 
    jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
@@ -1010,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
    }
 }
 
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw)
+{
+   unsigned i;
+
+   for (i = 0; i < draw->num_samplers; i++) {
+      struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
+
+      if (draw->samplers[i]) {
+         jit_tex->min_lod = draw->samplers[i]->min_lod;
+         jit_tex->max_lod = draw->samplers[i]->max_lod;
+         jit_tex->lod_bias = draw->samplers[i]->lod_bias;
+         COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+      }
+   }
+}
+
+
 void
 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
 {