gallium/draw: initial code to properly support llvm in the draw module
authorZack Rusin <zackr@vmware.com>
Tue, 23 Feb 2010 03:02:58 +0000 (22:02 -0500)
committerZack Rusin <zackr@vmware.com>
Tue, 23 Feb 2010 03:02:58 +0000 (22:02 -0500)
code generate big chunks of the vertex pipeline in order to speed up
software vertex processing.

12 files changed:
src/gallium/auxiliary/SConscript
src/gallium/auxiliary/draw/draw_context.c
src/gallium/auxiliary/draw/draw_context.h
src/gallium/auxiliary/draw/draw_llvm.c [new file with mode: 0644]
src/gallium/auxiliary/draw/draw_llvm.h [new file with mode: 0644]
src/gallium/auxiliary/draw/draw_llvm_translate.c [new file with mode: 0644]
src/gallium/auxiliary/draw/draw_private.h
src/gallium/auxiliary/draw/draw_pt.c
src/gallium/auxiliary/draw/draw_pt.h
src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c [new file with mode: 0644]
src/gallium/auxiliary/draw/draw_vs_llvm.c
src/gallium/drivers/llvmpipe/lp_context.c

index b531ad2dbd9f8c48bb47b661fe293b877944df3d..51c4a0cbbe3e1bc2c81b480b44d26e5ddc63b166 100644 (file)
@@ -194,6 +194,8 @@ if drawllvm:
     'gallivm/lp_bld_swizzle.c',
     'gallivm/lp_bld_tgsi_soa.c',
     'gallivm/lp_bld_type.c',
+    'draw/draw_llvm.c',
+    'draw/draw_pt_fetch_shade_pipeline_llvm.c'
     ]
 
 gallium = env.ConvenienceLibrary(
index d5ddc4a6a922c25cf1bb008f6284c601e9de4522..6fa73ad56ba67b0592527a2313e23eccd58f1e4c 100644 (file)
@@ -44,6 +44,18 @@ struct draw_context *draw_create( void )
    if (draw == NULL)
       goto fail;
 
+   if (!draw_init(draw))
+      goto fail;
+
+   return draw;
+
+fail:
+   draw_destroy( draw );
+   return NULL;
+}
+
+boolean draw_init(struct draw_context *draw)
+{
    ASSIGN_4V( draw->plane[0], -1,  0,  0, 1 );
    ASSIGN_4V( draw->plane[1],  1,  0,  0, 1 );
    ASSIGN_4V( draw->plane[2],  0, -1,  0, 1 );
@@ -57,22 +69,18 @@ struct draw_context *draw_create( void )
 
 
    if (!draw_pipeline_init( draw ))
-      goto fail;
+      return FALSE;
 
    if (!draw_pt_init( draw ))
-      goto fail;
+      return FALSE;
 
    if (!draw_vs_init( draw ))
-      goto fail;
+      return FALSE;
 
    if (!draw_gs_init( draw ))
-      goto fail;
+      return FALSE;
 
-   return draw;
-
-fail:
-   draw_destroy( draw );   
-   return NULL;
+   return TRUE;
 }
 
 
index acd81b9712d89a18969b6eeca5bb003f51af5ca2..d42e400318317b55dfac35d4697fa06042f37ef0 100644 (file)
@@ -40,6 +40,9 @@
 
 #include "pipe/p_state.h"
 
+#ifdef DRAW_LLVM
+#include <llvm-c/ExecutionEngine.h>
+#endif
 
 struct pipe_context;
 struct draw_context;
@@ -197,6 +200,11 @@ boolean draw_need_pipeline(const struct draw_context *draw,
                            const struct pipe_rasterizer_state *rasterizer,
                            unsigned prim );
 
-
+#ifdef DRAW_LLVM
+/*******************************************************************************
+ * LLVM integration
+ */
+struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine);
+#endif
 
 #endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
new file mode 100644 (file)
index 0000000..6b0ddfd
--- /dev/null
@@ -0,0 +1,311 @@
+#include "draw_llvm.h"
+
+#include "draw_context.h"
+#include "draw_vs.h"
+
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_interp.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "util/u_cpu_detect.h"
+
+#include <llvm-c/Transforms/Scalar.h>
+
+static void
+init_globals(struct draw_llvm *llvm)
+{
+    LLVMTypeRef vertex_header;
+    LLVMTypeRef texture_type;
+
+   /* struct vertex_header */
+   {
+      LLVMTypeRef elem_types[3];
+
+      elem_types[0]  = LLVMIntType(32);
+      elem_types[1]  = LLVMArrayType(LLVMFloatType(), 4);
+      elem_types[2]  = LLVMArrayType(elem_types[1], 0);
+
+      vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+      /* these are bit-fields and we can't take address of them
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_CLIPMASK);
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_EDGEFLAG);
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_PAD);
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_VERTEX_ID);
+      */
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_CLIP);
+      LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
+                             llvm->target, vertex_header,
+                             DRAW_JIT_VERTEX_DATA);
+
+      LP_CHECK_STRUCT_SIZE(struct vertex_header,
+                           llvm->target, vertex_header);
+
+      LLVMAddTypeName(llvm->module, "vertex_header", vertex_header);
+
+      llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
+   }
+      /* struct draw_jit_texture */
+   {
+      LLVMTypeRef elem_types[4];
+
+      elem_types[DRAW_JIT_TEXTURE_WIDTH]  = LLVMInt32Type();
+      elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+      elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+      elem_types[DRAW_JIT_TEXTURE_DATA]   = LLVMPointerType(LLVMInt8Type(), 0);
+
+      texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_WIDTH);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_HEIGHT);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_STRIDE);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
+                             llvm->target, texture_type,
+                             DRAW_JIT_TEXTURE_DATA);
+      LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
+                           llvm->target, texture_type);
+
+      LLVMAddTypeName(llvm->module, "texture", texture_type);
+   }
+
+
+   /* struct draw_jit_context */
+   {
+      LLVMTypeRef elem_types[3];
+      LLVMTypeRef context_type;
+
+      elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+      elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
+      elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+
+      context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
+                             llvm->target, context_type, 0);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
+                             llvm->target, context_type, 1);
+      LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
+                             llvm->target, context_type,
+                             DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+      LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
+                           llvm->target, context_type);
+
+      LLVMAddTypeName(llvm->module, "context", context_type);
+
+      llvm->context_ptr_type = LLVMPointerType(context_type, 0);
+   }
+}
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw)
+{
+   struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+
+   util_cpu_detect();
+
+   llvm->draw = draw;
+   llvm->engine = draw->engine;
+
+   debug_assert(llvm->engine);
+
+   llvm->module = LLVMModuleCreateWithName("draw_llvm");
+   llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
+
+   LLVMAddModuleProvider(llvm->engine, llvm->provider);
+
+   llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
+
+   llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
+   LLVMAddTargetData(llvm->target, llvm->pass);
+   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+    * but there are more on SVN. */
+   /* TODO: Add more passes */
+   LLVMAddConstantPropagationPass(llvm->pass);
+   if(util_cpu_caps.has_sse4_1) {
+      /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+       * and sitofp (necessary for trunc/floor/ceil/round implementation)
+       * somehow becomes invalid code.
+       */
+      LLVMAddInstructionCombiningPass(llvm->pass);
+   }
+   LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+   LLVMAddGVNPass(llvm->pass);
+   LLVMAddCFGSimplificationPass(llvm->pass);
+
+   init_globals(llvm);
+
+
+#if 1
+   LLVMDumpModule(llvm->module);
+#endif
+
+   return llvm;
+}
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm)
+{
+   free(llvm);
+}
+
+void
+draw_llvm_prepare(struct draw_llvm *llvm)
+{
+}
+
+
+struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine)
+{
+   struct draw_context *draw = CALLOC_STRUCT( draw_context );
+   if (draw == NULL)
+      goto fail;
+   draw->engine = engine;
+
+   if (!draw_init(draw))
+      goto fail;
+
+   return draw;
+
+fail:
+   draw_destroy( draw );
+   return NULL;
+}
+
+static void
+generate_vs(struct draw_llvm *llvm,
+            LLVMBuilderRef builder,
+            LLVMValueRef context_ptr,
+            LLVMValueRef io)
+{
+   const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
+   struct lp_type vs_type = lp_type_float(32);
+   LLVMValueRef vs_consts;
+   const LLVMValueRef (*inputs)[NUM_CHANNELS];
+   LLVMValueRef (*outputs)[NUM_CHANNELS];
+
+   lp_build_tgsi_soa(builder,
+                     tokens,
+                     vs_type,
+                     NULL /*struct lp_build_mask_context *mask*/,
+                     vs_consts,
+                     NULL /*pos*/,
+                     inputs,
+                     outputs,
+                     NULL/*sampler*/);
+}
+
+void
+draw_llvm_generate(struct draw_llvm *llvm)
+{
+   LLVMTypeRef arg_types[5];
+   LLVMTypeRef func_type;
+   LLVMValueRef context_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef function;
+   LLVMValueRef start, end, count, stride, step;
+   LLVMValueRef io_ptr;
+   unsigned i;
+   unsigned chan;
+   struct lp_build_context bld;
+   struct lp_build_loop_state lp_loop;
+   struct lp_type vs_type = lp_type_float(32);
+
+   arg_types[0] = llvm->context_ptr_type;           /* context */
+   arg_types[1] = llvm->vertex_header_ptr_type;     /* vertex_header */
+   arg_types[2] = LLVMInt32Type();                  /* start */
+   arg_types[3] = LLVMInt32Type();                  /* count */
+   arg_types[4] = LLVMInt32Type();                  /* stride */
+
+   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+   function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
+   LLVMSetFunctionCallConv(function, LLVMCCallConv);
+   for(i = 0; i < Elements(arg_types); ++i)
+      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+         LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+
+   context_ptr  = LLVMGetParam(function, 0);
+   io_ptr       = LLVMGetParam(function, 1);
+   start        = LLVMGetParam(function, 2);
+   count        = LLVMGetParam(function, 3);
+   stride       = LLVMGetParam(function, 4);
+
+   lp_build_name(context_ptr, "context");
+   lp_build_name(io_ptr, "io");
+   lp_build_name(start, "start");
+   lp_build_name(count, "count");
+   lp_build_name(stride, "stride");
+
+   /*
+    * Function body
+    */
+
+   block = LLVMAppendBasicBlock(function, "entry");
+   builder = LLVMCreateBuilder();
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   lp_build_context_init(&bld, builder, vs_type);
+
+   end = lp_build_add(&bld, start, count);
+
+   step = LLVMConstInt(LLVMInt32Type(), 1, 0);
+   lp_build_loop_begin(builder, start, &lp_loop);
+   {
+      LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");
+
+      generate_vs(llvm,
+                  builder,
+                  context_ptr,
+                  io);
+   }
+   lp_build_loop_end(builder, end, step, &lp_loop);
+
+
+   LLVMBuildRetVoid(builder);
+
+   LLVMDisposeBuilder(builder);
+
+   /*
+    * Translate the LLVM IR into machine code.
+    */
+
+#ifdef DEBUG
+   if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
+      LLVMDumpValue(function);
+      assert(0);
+   }
+#endif
+
+   LLVMRunFunctionPassManager(llvm->pass, function);
+
+   if (1) {
+      LLVMDumpValue(function);
+      debug_printf("\n");
+   }
+
+   llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function);
+
+   if (1)
+      lp_disassemble(llvm->jit_func);
+}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
new file mode 100644 (file)
index 0000000..0a1845f
--- /dev/null
@@ -0,0 +1,154 @@
+#ifndef DRAW_LLVM_H
+#define DRAW_LLVM_H
+
+#include "draw/draw_private.h"
+
+#include "pipe/p_context.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Analysis.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/ExecutionEngine.h>
+
+struct draw_jit_texture
+{
+   uint32_t width;
+   uint32_t height;
+   uint32_t stride;
+   const void *data;
+};
+
+enum {
+   DRAW_JIT_TEXTURE_WIDTH = 0,
+   DRAW_JIT_TEXTURE_HEIGHT,
+   DRAW_JIT_TEXTURE_STRIDE,
+   DRAW_JIT_TEXTURE_DATA
+};
+
+enum {
+   DRAW_JIT_VERTEX_VERTEX_ID = 0,
+   DRAW_JIT_VERTEX_CLIP,
+   DRAW_JIT_VERTEX_DATA
+};
+
+/**
+ * This structure is passed directly to the generated vertex shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the draw_jit_context_* macros.
+ * Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct draw_jit_context
+{
+   const float *vs_constants;
+   const float *gs_constants;
+
+
+   struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+};
+
+
+#define draw_jit_context_vs_constants(_builder, _ptr) \
+   lp_build_struct_get(_builder, _ptr, 0, "vs_constants")
+
+#define draw_jit_context_gs_constants(_builder, _ptr) \
+   lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
+
+#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+
+#define draw_jit_context_textures(_builder, _ptr) \
+   lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
+/* we are construction a function of the form:
+
+struct vertex_header {
+   uint32 vertex_id;
+
+   float clip[4];
+   float data[][4];
+};
+
+struct draw_jit_context
+{
+   const float *vs_constants;
+   const float *gs_constants;
+
+   struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+   const void *vbuffers;
+};
+
+void
+draw_shader(struct draw_jit_context *context,
+            struct vertex_header *io,
+            unsigned start,
+            unsigned count,
+            unsigned stride)
+{
+  // do a fetch and a run vertex shader
+  for (int i = 0; i < count; ++i) {
+    struct vertex_header *header = &io[i];
+    header->vertex_id = 0xffff;
+    // follows code-genarted fetch/translate section
+    // for each vertex_element ...
+    codegened_translate(header->data[num_element],
+                       context->vertex_elements[num_element],
+                       context->vertex_buffers,
+                       context->vbuffers);
+
+    codegened_vertex_shader(header->data, context->vs_constants);
+  }
+
+  for (int i = 0; i < count; i += context->primitive_size) {
+     struct vertex_header *prim[MAX_PRIMITIVE_SIZE];
+     for (int j = 0; j < context->primitive_size; ++j) {
+       header[j] = &io[i + j];
+     }
+     codegened_geometry_shader(prim, gs_constants);
+  }
+}
+*/
+
+typedef void
+(*draw_jit_vert_func)(struct draw_jit_context *context,
+                      struct vertex_header *io,
+                      unsigned start,
+                      unsigned count,
+                      unsigned stride);
+
+struct draw_llvm {
+   struct draw_context *draw;
+
+   struct draw_jit_context jit_context;
+
+   draw_jit_vert_func jit_func;
+
+   LLVMModuleRef module;
+   LLVMExecutionEngineRef engine;
+   LLVMModuleProviderRef provider;
+   LLVMTargetDataRef target;
+   LLVMPassManagerRef pass;
+
+   LLVMTypeRef context_ptr_type;
+   LLVMTypeRef vertex_header_ptr_type;
+};
+
+
+struct draw_llvm *
+draw_llvm_create(struct draw_context *draw);
+
+void
+draw_llvm_destroy(struct draw_llvm *llvm);
+
+void
+draw_llvm_prepare(struct draw_llvm *llvm);
+
+/* generates the draw jit function */
+void
+draw_llvm_generate(struct draw_llvm *llvm);
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
new file mode 100644 (file)
index 0000000..588e97b
--- /dev/null
@@ -0,0 +1,653 @@
+
+
+
+#include "util/u_memory.h"
+#include "pipe/p_state.h"
+#include "translate.h"
+
+
+#define DRAW_DBG 0
+
+typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*emit_func)(const float *attrib, void *ptr);
+
+
+
+struct translate_generic {
+   struct translate translate;
+
+   struct {
+      enum translate_element_type type;
+
+      fetch_func fetch;
+      unsigned buffer;
+      unsigned input_offset;
+      unsigned instance_divisor;
+
+      emit_func emit;
+      unsigned output_offset;
+
+      char *input_ptr;
+      unsigned input_stride;
+
+   } attrib[PIPE_MAX_ATTRIBS];
+
+   unsigned nr_attrib;
+};
+
+
+static struct translate_generic *translate_generic( struct translate *translate )
+{
+   return (struct translate_generic *)translate;
+}
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define ATTRIB( NAME, SZ, TYPE, FROM, TO )             \
+static void                                            \
+fetch_##NAME(const void *ptr, float *attrib)           \
+{                                                      \
+   const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f };  \
+   unsigned i;                                         \
+                                                       \
+   for (i = 0; i < SZ; i++) {                          \
+      attrib[i] = FROM(i);                             \
+   }                                                   \
+                                                       \
+   for (; i < 4; i++) {                                        \
+      attrib[i] = defaults[i];                         \
+   }                                                   \
+}                                                      \
+                                                       \
+static void                                            \
+emit_##NAME(const float *attrib, void *ptr)            \
+{  \
+   unsigned i;                                         \
+   TYPE *out = (TYPE *)ptr;                            \
+                                                       \
+   for (i = 0; i < SZ; i++) {                          \
+      out[i] = TO(attrib[i]);                          \
+   }                                                   \
+}
+
+{
+
+   return conv = instr(builder, bc, "");
+}
+
+static INLINE LLVMValueRef
+from_64_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMDoubleType() , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   return LLVMBuildFPTrunc(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_float(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMFloatType() , "");
+   return LLVMBuildLoad(builder, bc, "");
+}
+
+static INLINE LLVMValueRef
+from_8_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+   return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(16) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_uscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(32) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   return LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_8_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+   return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_16_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(16) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+static INLINE LLVMValueRef
+from_32_sscaled(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(32) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   return LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+}
+
+
+static INLINE LLVMValueRef
+from_8_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+   LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 255.));
+}
+
+static INLINE LLVMValueRef
+from_16_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(16) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 65535.));
+}
+
+static INLINE LLVMValueRef
+from_32_unorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(32) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   LLVMValueRef uscaled = LLVMBuildUIToFP(builder, l, LLVMFloatType(), "");
+
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 4294967295.));
+}
+
+static INLINE LLVMValueRef
+from_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+   LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 127.0));
+}
+
+static INLINE LLVMValueRef
+from_16_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(16) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 32767.0f));
+}
+
+static INLINE LLVMValueRef
+from_32_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(32) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 2147483647.0));
+}
+
+static INLINE LLVMValueRef
+from_32_fixed(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef bc = LLVMBuildBitCast(builder, val,
+                                      LLVMIntType(32) , "");
+   LLVMValueRef l = LLVMBuildLoad(builder, bc, "");
+   LLVMValueRef uscaled = LLVMBuildSIToFP(builder, l, LLVMFloatType(), "");
+
+   return LLVMBuildFDiv(builder, uscaled,
+                        LLVMConstReal(builder, 65536.0));
+}
+
+static INLINE LLVMValueRef
+to_64_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPExt(builder, l, LLVMDoubleType(), "");
+}
+
+static INLINE LLVMValueRef
+to_32_float(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   return LLVMBuildLoad(builder, fp, "");
+}
+
+atic INLINE LLVMValueRef
+to_8_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToUI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_uscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+}
+
+static INLINE LLVMValueRef
+to_16_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+}
+
+static INLINE LLVMValueRef
+to_32_sscaled(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   return LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+}
+
+static INLINE LLVMValueRef
+to_8_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(8), "");
+   return LLVMBuildFMul(builder, uscaled,
+                        LLVMConstReal(builder, 255.));
+}
+
+static INLINE LLVMValueRef
+to_16_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+   return LLVMBuildFMul(builder, uscaled,
+                        LLVMConstReal(builder, 65535.));
+}
+
+static INLINE LLVMValueRef
+to_32_unorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToUI(builder, l, LLVMIntType(32), "");
+
+   return LLVMBuildFMul(builder, uscaled,
+                        LLVMConstReal(builder, 4294967295.));
+}
+
+static INLINE LLVMValueRef
+to_8_snorm(LLVMBuilderRef builder, LLVMValueRef val)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, val, "");
+   LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(8), "");
+   return LLVMBuildFMUL(builder, uscaled,
+                        LLVMConstReal(builder, 127.0));
+}
+
+static INLINE LLVMValueRef
+to_16_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(16), "");
+   return LLVMBuildFMul(builder, uscaled,
+                        LLVMConstReal(builder, 32767.0f));
+}
+
+static INLINE LLVMValueRef
+to_32_snorm(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+   return LLVMBuildFMUL(builder, uscaled,
+                        LLVMConstReal(builder, 2147483647.0));
+}
+
+static INLINE LLVMValueRef
+to_32_fixed(LLVMBuilderRef builder, LLVMValueRef fp)
+{
+   LLVMValueRef l = LLVMBuildLoad(builder, fp, "");
+   LLVMValueRef uscaled = LLVMBuildFPToSI(builder, l, LLVMIntType(32), "");
+
+   return LLVMBuildFMul(builder, uscaled,
+                        LLVMConstReal(builder, 65536.0));
+}
+
+static LLVMValueRef
+fetch(LLVMValueRef ptr, int val_size, int nr_components,
+     LLVMValueRef res)
+{
+   int i;
+   int offset = 0;
+
+   for (i = 0; i < nr_components; ++i) {
+      LLVMValueRef src_index = LLVMConstInt(LLVMInt32Type(), offset, 0);
+      LLVMValueRef dst_index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      //getelementptr i8* ptr, i64 offset
+      LLVMValueRef src_tmp = LLVMBuildGEP(builder, ptr, &src_index, 1, "");
+      //getelementptr float* res, i64 i
+      LLVMValueRef res_tmp = LLVMBuildGEP(builder, res, &dst_index, 1, "");
+      //bitcast i8* src, to res_type*
+      //load res_type src
+      //convert res_type src to float
+      //store float src, float *dst src
+      offset += val_size;
+   }
+}
+
+
+static void
+fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib)
+{
+   attrib[2] = FROM_8_UNORM(0);
+   attrib[1] = FROM_8_UNORM(1);
+   attrib[0] = FROM_8_UNORM(2);
+   attrib[3] = FROM_8_UNORM(3);
+}
+
+static void
+emit_B8G8R8A8_UNORM( const float *attrib, void *ptr)
+{
+   ubyte *out = (ubyte *)ptr;
+   out[2] = TO_8_UNORM(attrib[0]);
+   out[1] = TO_8_UNORM(attrib[1]);
+   out[0] = TO_8_UNORM(attrib[2]);
+   out[3] = TO_8_UNORM(attrib[3]);
+}
+
+static void
+fetch_NULL( const void *ptr, float *attrib )
+{
+   attrib[0] = 0;
+   attrib[1] = 0;
+   attrib[2] = 0;
+   attrib[3] = 1;
+}
+
+static void
+emit_NULL( const float *attrib, void *ptr )
+{
+   /* do nothing is the only sensible option */
+}
+
+typedef LLVMValueRef (*from_func)(LLVMBuilderRef, LLVMValueRef);
+typedef  LLVMValueRef (*to_func)(LLVMBuilderRef, LLVMValueRef);
+
+struct draw_llvm_translate {
+   int format;
+   from_func from;
+   to_func to;
+   LLVMTypeRef type;
+   int num_components;
+} translates[] =
+{
+   {PIPE_FORMAT_R64_FLOAT,          from_64_float, to_64_float, LLVMDoubleType(), 1},
+   {PIPE_FORMAT_R64G64_FLOAT,       from_64_float, to_64_float, LLVMDoubleType(), 2},
+   {PIPE_FORMAT_R64G64B64_FLOAT,    from_64_float, to_64_float, LLVMDoubleType(), 3},
+   {PIPE_FORMAT_R64G64B64A64_FLOAT, from_64_float, to_64_float, LLVMDoubleType(), 4},
+
+   {PIPE_FORMAT_R32_FLOAT,          from_32_float, to_32_float, LLVMFloatType(), 1},
+   {PIPE_FORMAT_R32G32_FLOAT,       from_32_float, to_32_float, LLVMFloatType(), 2},
+   {PIPE_FORMAT_R32G32B32_FLOAT,    from_32_float, to_32_float, LLVMFloatType(), 3},
+   {PIPE_FORMAT_R32G32B32A32_FLOAT, from_32_float, to_32_float, LLVMFloatType(), 4},
+
+   {PIPE_FORMAT_R32_UNORM,          from_32_unorm, to_32_unorm, LLVMIntType(32), 1},
+   {PIPE_FORMAT_R32G32_UNORM,       from_32_unorm, to_32_unorm, LLVMIntType(32), 2},
+   {PIPE_FORMAT_R32G32B32_UNORM,    from_32_unorm, to_32_unorm, LLVMIntType(32), 3},
+   {PIPE_FORMAT_R32G32B32A32_UNORM, from_32_unorm, to_32_unorm, LLVMIntType(32), 4},
+
+   {PIPE_FORMAT_R32_USCALED,          from_32_uscaled, to_32_uscaled, LLVMIntType(32), 1},
+   {PIPE_FORMAT_R32G32_USCALED,       from_32_uscaled, to_32_uscaled, LLVMIntType(32), 2},
+   {PIPE_FORMAT_R32G32B32_USCALED,    from_32_uscaled, to_32_uscaled, LLVMIntType(32), 3},
+   {PIPE_FORMAT_R32G32B32A32_USCALED, from_32_uscaled, to_32_uscaled, LLVMIntType(32), 4},
+
+   {PIPE_FORMAT_R32_SNORM,          from_32_snorm, to_32_snorm, LLVMIntType(32), 1},
+   {PIPE_FORMAT_R32G32_SNORM,       from_32_snorm, to_32_snorm, LLVMIntType(32), 2},
+   {PIPE_FORMAT_R32G32B32_SNORM,    from_32_snorm, to_32_snorm, LLVMIntType(32), 3},
+   {PIPE_FORMAT_R32G32B32A32_SNORM, from_32_snorm, to_32_snorm, LLVMIntType(32), 4},
+
+   {PIPE_FORMAT_R32_SSCALED,          from_32_sscaled, to_32_sscaled, LLVMIntType(32), 1},
+   {PIPE_FORMAT_R32G32_SSCALED,       from_32_sscaled, to_32_sscaled, LLVMIntType(32), 2},
+   {PIPE_FORMAT_R32G32B32_SSCALED,    from_32_sscaled, to_32_sscaled, LLVMIntType(32), 3},
+   {PIPE_FORMAT_R32G32B32A32_SSCALED, from_32_sscaled, to_32_sscaled, LLVMIntType(32), 4},
+
+   {PIPE_FORMAT_R16_UNORM,          from_16_unorm, to_16_unorm, LLVMIntType(16), 1},
+   {PIPE_FORMAT_R16G16_UNORM,       from_16_unorm, to_16_unorm, LLVMIntType(16), 2},
+   {PIPE_FORMAT_R16G16B16_UNORM,    from_16_unorm, to_16_unorm, LLVMIntType(16), 3},
+   {PIPE_FORMAT_R16G16B16A16_UNORM, from_16_unorm, to_16_unorm, LLVMIntType(16), 4},
+
+   {PIPE_FORMAT_R16_USCALED,          from_16_uscaled, to_16_uscaled, LLVMIntType(16), 1},
+   {PIPE_FORMAT_R16G16_USCALED,       from_16_uscaled, to_16_uscaled, LLVMIntType(16), 2},
+   {PIPE_FORMAT_R16G16B16_USCALED,    from_16_uscaled, to_16_uscaled, LLVMIntType(16), 3},
+   {PIPE_FORMAT_R16G16B16A16_USCALED, from_16_uscaled, to_16_uscaled, LLVMIntType(16), 4},
+
+   {PIPE_FORMAT_R16_SNORM,          from_16_snorm, to_16_snorm, LLVMIntType(16), 1},
+   {PIPE_FORMAT_R16G16_SNORM,       from_16_snorm, to_16_snorm, LLVMIntType(16), 2},
+   {PIPE_FORMAT_R16G16B16_SNORM,    from_16_snorm, to_16_snorm, LLVMIntType(16), 3},
+   {PIPE_FORMAT_R16G16B16A16_SNORM, from_16_snorm, to_16_snorm, LLVMIntType(16), 4},
+
+   {PIPE_FORMAT_R16_SSCALED,          from_16_sscaled, to_16_sscaled, LLVMIntType(16), 1},
+   {PIPE_FORMAT_R16G16_SSCALED,       from_16_sscaled, to_16_sscaled, LLVMIntType(16), 2},
+   {PIPE_FORMAT_R16G16B16_SSCALED,    from_16_sscaled, to_16_sscaled, LLVMIntType(16), 3},
+   {PIPE_FORMAT_R16G16B16A16_SSCALED, from_16_sscaled, to_16_sscaled, LLVMIntType(16), 4},
+
+   {PIPE_FORMAT_R8_UNORM,       from_8_unorm, to_8_unorm, LLVMIntType(8), 1},
+   {PIPE_FORMAT_R8G8_UNORM,     from_8_unorm, to_8_unorm, LLVMIntType(8), 2},
+   {PIPE_FORMAT_R8G8B8_UNORM,   from_8_unorm, to_8_unorm, LLVMIntType(8), 3},
+   {PIPE_FORMAT_R8G8B8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4},
+
+   {PIPE_FORMAT_R8_USCALED,       from_8_uscaled, to_8_uscaled, LLVMIntType(8), 1},
+   {PIPE_FORMAT_R8G8_USCALED,     from_8_uscaled, to_8_uscaled, LLVMIntType(8), 2},
+   {PIPE_FORMAT_R8G8B8_USCALED,   from_8_uscaled, to_8_uscaled, LLVMIntType(8), 3},
+   {PIPE_FORMAT_R8G8B8A8_USCALED, from_8_uscaled, to_8_uscaled, LLVMIntType(8), 4},
+
+   {PIPE_FORMAT_R8_SNORM,       from_8_snorm, to_8_snorm, LLVMIntType(8), 1},
+   {PIPE_FORMAT_R8G8_SNORM,     from_8_snorm, to_8_snorm, LLVMIntType(8), 2},
+   {PIPE_FORMAT_R8G8B8_SNORM,   from_8_snorm, to_8_snorm, LLVMIntType(8), 3},
+   {PIPE_FORMAT_R8G8B8A8_SNORM, from_8_snorm, to_8_snorm, LLVMIntType(8), 4},
+
+   {PIPE_FORMAT_R8_SSCALED,       from_8_sscaled, to_8_sscaled, LLVMIntType(8), 1},
+   {PIPE_FORMAT_R8G8_SSCALED,     from_8_sscaled, to_8_sscaled, LLVMIntType(8), 2},
+   {PIPE_FORMAT_R8G8B8_SSCALED,   from_8_sscaled, to_8_sscaled, LLVMIntType(8), 3},
+   {PIPE_FORMAT_R8G8B8A8_SSCALED, from_8_sscaled, to_8_sscaled, LLVMIntType(8), 4},
+
+   {PIPE_FORMAT_R32_FIXED,          from_32_fixed, to_32_fixed, LLVMIntType(32), 1},
+   {PIPE_FORMAT_R32G32_FIXED,       from_32_fixed, to_32_fixed, LLVMIntType(32), 2},
+   {PIPE_FORMAT_R32G32B32_FIXED,    from_32_fixed, to_32_fixed, LLVMIntType(32), 3},
+   {PIPE_FORMAT_R32G32B32A32_FIXED, from_32_fixed, to_32_fixed, LLVMIntType(32), 4},
+
+   {PIPE_FORMAT_A8R8G8B8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(8), 4},
+   {PIPE_FORMAT_B8G8R8A8_UNORM, from_8_unorm, to_8_unorm, LLVMIntType(), 4},
+};
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void PIPE_CDECL generic_run_elts( struct translate *translate,
+                                         const unsigned *elts,
+                                         unsigned count,
+                                         unsigned instance_id,
+                                         void *output_buffer )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   char *vert = output_buffer;
+   unsigned nr_attrs = tg->nr_attrib;
+   unsigned attr;
+   unsigned i;
+
+   /* loop over vertex attributes (vertex shader inputs)
+    */
+   for (i = 0; i < count; i++) {
+      unsigned elt = *elts++;
+
+      for (attr = 0; attr < nr_attrs; attr++) {
+        float data[4];
+         const char *src;
+
+        char *dst = (vert +
+                     tg->attrib[attr].output_offset);
+
+         if (tg->attrib[attr].instance_divisor) {
+            src = tg->attrib[attr].input_ptr +
+                  tg->attrib[attr].input_stride *
+                  (instance_id / tg->attrib[attr].instance_divisor);
+         } else {
+            src = tg->attrib[attr].input_ptr +
+                  tg->attrib[attr].input_stride * elt;
+         }
+
+        tg->attrib[attr].fetch( src, data );
+
+         if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
+                             i, elt, attr, data[0], data[1], data[2], data[3]);
+
+        tg->attrib[attr].emit( data, dst );
+      }
+
+      vert += tg->translate.key.output_stride;
+   }
+}
+
+
+
+static void PIPE_CDECL generic_run( struct translate *translate,
+                                    unsigned start,
+                                    unsigned count,
+                                    unsigned instance_id,
+                                    void *output_buffer )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   char *vert = output_buffer;
+   unsigned nr_attrs = tg->nr_attrib;
+   unsigned attr;
+   unsigned i;
+
+   /* loop over vertex attributes (vertex shader inputs)
+    */
+   for (i = 0; i < count; i++) {
+      unsigned elt = start + i;
+
+      for (attr = 0; attr < nr_attrs; attr++) {
+        float data[4];
+
+        char *dst = (vert +
+                     tg->attrib[attr].output_offset);
+
+         if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+            const char *src;
+
+            if (tg->attrib[attr].instance_divisor) {
+               src = tg->attrib[attr].input_ptr +
+                     tg->attrib[attr].input_stride *
+                     (instance_id / tg->attrib[attr].instance_divisor);
+            } else {
+               src = tg->attrib[attr].input_ptr +
+                     tg->attrib[attr].input_stride * elt;
+            }
+
+            tg->attrib[attr].fetch( src, data );
+         } else {
+            data[0] = (float)instance_id;
+         }
+
+         if (0) debug_printf("vert %d attr %d: %f %f %f %f\n",
+                             i, attr, data[0], data[1], data[2], data[3]);
+
+        tg->attrib[attr].emit( data, dst );
+      }
+
+      vert += tg->translate.key.output_stride;
+   }
+}
+
+
+
+static void generic_set_buffer( struct translate *translate,
+                               unsigned buf,
+                               const void *ptr,
+                               unsigned stride )
+{
+   struct translate_generic *tg = translate_generic(translate);
+   unsigned i;
+
+   for (i = 0; i < tg->nr_attrib; i++) {
+      if (tg->attrib[i].buffer == buf) {
+        tg->attrib[i].input_ptr = ((char *)ptr +
+                                   tg->attrib[i].input_offset);
+        tg->attrib[i].input_stride = stride;
+      }
+   }
+}
+
+
+static void generic_release( struct translate *translate )
+{
+   /* Refcount?
+    */
+   FREE(translate);
+}
+
+struct translate *translate_generic_create( const struct translate_key *key )
+{
+   struct translate_generic *tg = CALLOC_STRUCT(translate_generic);
+   unsigned i;
+
+   if (tg == NULL)
+      return NULL;
+
+   tg->translate.key = *key;
+   tg->translate.release = generic_release;
+   tg->translate.set_buffer = generic_set_buffer;
+   tg->translate.run_elts = generic_run_elts;
+   tg->translate.run = generic_run;
+
+   for (i = 0; i < key->nr_elements; i++) {
+      tg->attrib[i].type = key->element[i].type;
+
+      tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format);
+      tg->attrib[i].buffer = key->element[i].input_buffer;
+      tg->attrib[i].input_offset = key->element[i].input_offset;
+      tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
+
+      tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+      tg->attrib[i].output_offset = key->element[i].output_offset;
+
+   }
+
+   tg->nr_attrib = key->nr_elements;
+
+
+   return &tg->translate;
+}
index 1e6e01af9e281cd0f20333f8c2543d71dd139fd8..7e24e5fd6fcdec6c0253dc19ca6d109fd51857c1 100644 (file)
 
 #include "tgsi/tgsi_scan.h"
 
+#ifdef DRAW_LLVM
+#include <llvm-c/ExecutionEngine.h>
+#endif
+
 
 struct pipe_context;
 struct draw_vertex_shader;
@@ -237,9 +241,16 @@ struct draw_context
 
    unsigned instance_id;
 
+#ifdef DRAW_LLVM
+   LLVMExecutionEngineRef engine;
+#endif
    void *driver_private;
 };
 
+/*******************************************************************************
+ * Draw common initialization code
+ */
+boolean draw_init(struct draw_context *draw);
 
 /*******************************************************************************
  * Vertex shader code:
index 341353f6289002030ecdda2a78e859f42423cb66..9b1e319551c45fd490b59cb14840464534099d65 100644 (file)
@@ -142,7 +142,9 @@ boolean draw_pt_init( struct draw_context *draw )
    if (!draw->pt.middle.fetch_shade_emit)
       return FALSE;
 
-   draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
+   draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit_llvm( draw );
+   if (!draw->pt.middle.general)
+      draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
    if (!draw->pt.middle.general)
       return FALSE;
 
index d5e0d92a605b983edee3a1210db3ab620896857c..c2797a759eecc87cfe67c6c9129196baa15d803f 100644 (file)
@@ -147,6 +147,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
 struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
 struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
 struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw);
 
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
new file mode 100644 (file)
index 0000000..ae59563
--- /dev/null
@@ -0,0 +1,432 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMWare, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
+#include "draw/draw_llvm.h"
+
+#include "translate/translate.h"
+
+
+struct llvm_middle_end {
+   struct draw_pt_middle_end base;
+   struct draw_context *draw;
+
+   struct pt_emit *emit;
+   struct pt_fetch *fetch;
+   struct pt_post_vs *post_vs;
+
+
+   unsigned vertex_data_offset;
+   unsigned vertex_size;
+   unsigned prim;
+   unsigned opt;
+
+   struct draw_llvm *llvm;
+};
+
+
+static void
+llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
+                         unsigned prim,
+                         unsigned opt,
+                         unsigned *max_vertices )
+{
+   struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+   struct draw_context *draw = fpme->draw;
+   struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+   struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+   unsigned i;
+   unsigned instance_id_index = ~0;
+
+   /* Add one to num_outputs because the pipeline occasionally tags on
+    * an additional texcoord, eg for AA lines.
+    */
+   unsigned nr = MAX2( vs->info.num_inputs,
+                      vs->info.num_outputs + 1 );
+
+   /* Scan for instanceID system value.
+    */
+   for (i = 0; i < vs->info.num_inputs; i++) {
+      if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+         instance_id_index = i;
+         break;
+      }
+   }
+
+   fpme->prim = prim;
+   fpme->opt = opt;
+
+   /* Always leave room for the vertex header whether we need it or
+    * not.  It's hard to get rid of it in particular because of the
+    * viewport code in draw_pt_post_vs.c.
+    */
+   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+   draw_pt_fetch_prepare( fpme->fetch,
+                          vs->info.num_inputs,
+                          fpme->vertex_size,
+                          instance_id_index );
+   if (opt & PT_SHADE) {
+      vs->prepare(vs, draw);
+      draw_geometry_shader_prepare(gs, draw);
+   }
+
+
+   /* XXX: it's not really gl rasterization rules we care about here,
+    * but gl vs dx9 clip spaces.
+    */
+   draw_pt_post_vs_prepare( fpme->post_vs,
+                           (boolean)draw->bypass_clipping,
+                           (boolean)(draw->identity_viewport ||
+                           draw->rasterizer->bypass_vs_clip_and_viewport),
+                           (boolean)draw->rasterizer->gl_rasterization_rules,
+                           (draw->vs.edgeflag_output ? true : false) );
+
+   if (!(opt & PT_PIPELINE)) {
+      draw_pt_emit_prepare( fpme->emit,
+                           prim,
+                            max_vertices );
+
+      *max_vertices = MAX2( *max_vertices,
+                            DRAW_PIPE_MAX_VERTICES );
+   }
+   else {
+      *max_vertices = DRAW_PIPE_MAX_VERTICES;
+   }
+
+   /* return even number */
+   *max_vertices = *max_vertices & ~1;
+
+   draw_llvm_prepare(fpme->llvm);
+}
+
+
+
+static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
+                                 const unsigned *fetch_elts,
+                                 unsigned fetch_count,
+                                 const ushort *draw_elts,
+                                 unsigned draw_count )
+{
+   struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+   struct draw_context *draw = fpme->draw;
+   struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+   struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
+   unsigned opt = fpme->opt;
+   unsigned alloc_count = align( fetch_count, 4 );
+
+   struct vertex_header *pipeline_verts =
+      (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+   if (!pipeline_verts) {
+      /* Not much we can do here - just skip the rendering.
+       */
+      assert(0);
+      return;
+   }
+
+   /* Fetch into our vertex buffer
+    */
+   draw_pt_fetch_run( fpme->fetch,
+                     fetch_elts,
+                     fetch_count,
+                     (char *)pipeline_verts );
+
+   /* Run the shader, note that this overwrites the data[] parts of
+    * the pipeline verts.  If there is no shader, eg if
+    * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+    * already in the correct place.*/
+   if (opt & PT_SHADE)
+   {
+      vshader->run_linear(vshader,
+                          (const float (*)[4])pipeline_verts->data,
+                          (      float (*)[4])pipeline_verts->data,
+                          draw->pt.user.vs_constants,
+                          fetch_count,
+                          fpme->vertex_size,
+                          fpme->vertex_size);
+      if (gshader)
+         draw_geometry_shader_run(gshader,
+                                  (const float (*)[4])pipeline_verts->data,
+                                  (      float (*)[4])pipeline_verts->data,
+                                  draw->pt.user.gs_constants,
+                                  fetch_count,
+                                  fpme->vertex_size,
+                                  fpme->vertex_size);
+   }
+
+   if (draw_pt_post_vs_run( fpme->post_vs,
+                           pipeline_verts,
+                           fetch_count,
+                           fpme->vertex_size ))
+   {
+      opt |= PT_PIPELINE;
+   }
+
+   /* Do we need to run the pipeline?
+    */
+   if (opt & PT_PIPELINE) {
+      draw_pipeline_run( fpme->draw,
+                         fpme->prim,
+                         pipeline_verts,
+                         fetch_count,
+                         fpme->vertex_size,
+                         draw_elts,
+                         draw_count );
+   }
+   else {
+      draw_pt_emit( fpme->emit,
+                   (const float (*)[4])pipeline_verts->data,
+                   fetch_count,
+                   fpme->vertex_size,
+                   draw_elts,
+                   draw_count );
+   }
+
+
+   FREE(pipeline_verts);
+}
+
+
+static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
+                                       unsigned start,
+                                       unsigned count)
+{
+   struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+   struct draw_context *draw = fpme->draw;
+   unsigned opt = fpme->opt;
+   unsigned alloc_count = align( count, 4 );
+
+   struct vertex_header *pipeline_verts =
+      (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+   if (!pipeline_verts) {
+      /* Not much we can do here - just skip the rendering.
+       */
+      assert(0);
+      return;
+   }
+
+   fpme->llvm->jit_func( &fpme->llvm->jit_context,
+                         pipeline_verts,
+                         start,
+                         count,
+                         fpme->vertex_size );
+
+   if (draw_pt_post_vs_run( fpme->post_vs,
+                           pipeline_verts,
+                           count,
+                           fpme->vertex_size ))
+   {
+      opt |= PT_PIPELINE;
+   }
+
+   /* Do we need to run the pipeline?
+    */
+   if (opt & PT_PIPELINE) {
+      draw_pipeline_run_linear( fpme->draw,
+                                fpme->prim,
+                                pipeline_verts,
+                                count,
+                                fpme->vertex_size);
+   }
+   else {
+      draw_pt_emit_linear( fpme->emit,
+                           (const float (*)[4])pipeline_verts->data,
+                           fpme->vertex_size,
+                           count );
+   }
+
+   FREE(pipeline_verts);
+}
+
+
+
+static boolean
+llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
+                                 unsigned start,
+                                 unsigned count,
+                                 const ushort *draw_elts,
+                                 unsigned draw_count )
+{
+   struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+   struct draw_context *draw = fpme->draw;
+   struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+   struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
+   unsigned opt = fpme->opt;
+   unsigned alloc_count = align( count, 4 );
+
+   struct vertex_header *pipeline_verts =
+      (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+   if (!pipeline_verts)
+      return FALSE;
+
+   /* Fetch into our vertex buffer
+    */
+   draw_pt_fetch_run_linear( fpme->fetch,
+                             start,
+                             count,
+                             (char *)pipeline_verts );
+
+   /* Run the shader, note that this overwrites the data[] parts of
+    * the pipeline verts.  If there is no shader, ie if
+    * bypass_vs_clip_and_viewport, then the inputs == outputs, and are
+    * already in the correct place.
+    */
+   if (opt & PT_SHADE)
+   {
+      shader->run_linear(shader,
+                        (const float (*)[4])pipeline_verts->data,
+                        (      float (*)[4])pipeline_verts->data,
+                        draw->pt.user.vs_constants,
+                        count,
+                        fpme->vertex_size,
+                        fpme->vertex_size);
+
+      if (geometry_shader)
+         draw_geometry_shader_run(geometry_shader,
+                                  (const float (*)[4])pipeline_verts->data,
+                                  (      float (*)[4])pipeline_verts->data,
+                                  draw->pt.user.gs_constants,
+                                  count,
+                                  fpme->vertex_size,
+                                  fpme->vertex_size);
+   }
+
+   if (draw_pt_post_vs_run( fpme->post_vs,
+                           pipeline_verts,
+                           count,
+                           fpme->vertex_size ))
+   {
+      opt |= PT_PIPELINE;
+   }
+
+   /* Do we need to run the pipeline?
+    */
+   if (opt & PT_PIPELINE) {
+      draw_pipeline_run( fpme->draw,
+                         fpme->prim,
+                         pipeline_verts,
+                         count,
+                         fpme->vertex_size,
+                         draw_elts,
+                         draw_count );
+   }
+   else {
+      draw_pt_emit( fpme->emit,
+                   (const float (*)[4])pipeline_verts->data,
+                   count,
+                   fpme->vertex_size,
+                   draw_elts,
+                   draw_count );
+   }
+
+   FREE(pipeline_verts);
+   return TRUE;
+}
+
+
+
+static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
+{
+   /* nothing to do */
+}
+
+static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
+{
+   struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
+
+   if (fpme->fetch)
+      draw_pt_fetch_destroy( fpme->fetch );
+
+   if (fpme->emit)
+      draw_pt_emit_destroy( fpme->emit );
+
+   if (fpme->post_vs)
+      draw_pt_post_vs_destroy( fpme->post_vs );
+
+   if (fpme->llvm)
+      draw_llvm_destroy( fpme->llvm );
+
+   FREE(middle);
+}
+
+
+struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+{
+   struct llvm_middle_end *fpme = 0;
+
+   if (!draw->engine)
+      return NULL;
+
+   fpme = CALLOC_STRUCT( llvm_middle_end );
+   if (!fpme)
+      goto fail;
+
+   fpme->base.prepare         = llvm_middle_end_prepare;
+   fpme->base.run             = llvm_middle_end_run;
+   fpme->base.run_linear      = llvm_middle_end_linear_run;
+   fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
+   fpme->base.finish          = llvm_middle_end_finish;
+   fpme->base.destroy         = llvm_middle_end_destroy;
+
+   fpme->draw = draw;
+
+   fpme->fetch = draw_pt_fetch_create( draw );
+   if (!fpme->fetch)
+      goto fail;
+
+   fpme->post_vs = draw_pt_post_vs_create( draw );
+   if (!fpme->post_vs)
+      goto fail;
+
+   fpme->emit = draw_pt_emit_create( draw );
+   if (!fpme->emit)
+      goto fail;
+
+   fpme->llvm = draw_llvm_create(draw);
+   if (!fpme->llvm)
+      goto fail;
+
+   return &fpme->base;
+
+ fail:
+   if (fpme)
+      llvm_middle_end_destroy( &fpme->base );
+
+   return NULL;
+}
index 5f7a645f5d8496a8cc99868b64fdde4e93429560..0c483de40713d1158c57abd8fbb0bcf6e4c3f224 100644 (file)
@@ -40,7 +40,7 @@
 
 #include "tgsi/tgsi_parse.h"
 
-#ifdef MESA_LLVM
+#ifdef DRAW_LLVM
 
 struct draw_llvm_vertex_shader {
    struct draw_vertex_shader base;
@@ -64,12 +64,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
                   unsigned input_stride,
                   unsigned output_stride )
 {
-   struct draw_llvm_vertex_shader *shader =
-      (struct draw_llvm_vertex_shader *)base;
 }
 
-
-
 static void
 vs_llvm_delete( struct draw_vertex_shader *base )
 {
@@ -90,6 +86,7 @@ struct draw_vertex_shader *
 draw_create_vs_llvm(struct draw_context *draw,
                    const struct pipe_shader_state *templ)
 {
+#if 0
    struct draw_llvm_vertex_shader *vs;
 
    vs = CALLOC_STRUCT( draw_llvm_vertex_shader );
@@ -113,6 +110,8 @@ draw_create_vs_llvm(struct draw_context *draw,
    vs->machine = draw->vs.machine;
 
    return &vs->base;
+#endif
+   return NULL;
 }
 
 
index 9120226de0c108a8dbfa9f12589bc1cb4c2d20ab..3edc62d0c6985d7b6aab232adcbadedc164f9e67 100644 (file)
@@ -40,6 +40,7 @@
 #include "lp_context.h"
 #include "lp_flush.h"
 #include "lp_perf.h"
+#include "lp_screen.h"
 #include "lp_state.h"
 #include "lp_surface.h"
 #include "lp_query.h"
@@ -105,6 +106,7 @@ struct pipe_context *
 llvmpipe_create_context( struct pipe_screen *screen, void *priv )
 {
    struct llvmpipe_context *llvmpipe;
+   struct llvmpipe_screen *llvmscreen = llvmpipe_screen(screen);
 
    llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16);
    if (!llvmpipe)
@@ -174,8 +176,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
    /*
     * Create drawing context and plug our rendering stage into it.
     */
-   llvmpipe->draw = draw_create();
-   if (!llvmpipe->draw) 
+   llvmpipe->draw = draw_create_with_llvm(llvmscreen->engine);
+   if (!llvmpipe->draw)
       goto fail;
 
    /* FIXME: devise alternative to draw_texture_samplers */