gallium/llvm: implement geometry shaders in the llvm paths
authorZack Rusin <zackr@vmware.com>
Mon, 18 Feb 2013 12:00:19 +0000 (04:00 -0800)
committerZack Rusin <zackr@vmware.com>
Wed, 27 Mar 2013 10:53:02 +0000 (03:53 -0700)
This commits implements code generation of the geometry shaders in
the SOA paths. All the code is there but bugs are likely present.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_context.c
src/gallium/auxiliary/draw/draw_context.h
src/gallium/auxiliary/draw/draw_gs.c
src/gallium/auxiliary/draw/draw_gs.h
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/draw/draw_llvm.h
src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
src/gallium/drivers/llvmpipe/lp_state_fs.c

index 6b70ac88be6fc2bfbc93baa11c54809b82c81e96..d64b82b026193be8ac085f316c67519a4e18ce2e 100644 (file)
@@ -46,7 +46,7 @@
 #include "gallivm/lp_bld_limits.h"
 #include "draw_llvm.h"
 
-static boolean
+boolean
 draw_get_option_use_llvm(void)
 {
    static boolean first = TRUE;
@@ -808,16 +808,15 @@ draw_set_mapped_texture(struct draw_context *draw,
                         uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
                         uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
 {
-   if (shader_stage == PIPE_SHADER_VERTEX) {
 #ifdef HAVE_LLVM
-      if (draw->llvm)
-         draw_llvm_set_mapped_texture(draw,
-                                      sview_idx,
-                                      width, height, depth, first_level,
-                                      last_level, base_ptr,
-                                      row_stride, img_stride, mip_offsets);
+   if (draw->llvm)
+      draw_llvm_set_mapped_texture(draw,
+                                   shader_stage,
+                                   sview_idx,
+                                   width, height, depth, first_level,
+                                   last_level, base_ptr,
+                                   row_stride, img_stride, mip_offsets);
 #endif
-   }
 }
 
 /**
index 18c8595f8cf0771cf92b4806b33508279960163a..369f6c80fa0422c4b348b56e6a776125d289fbbe 100644 (file)
@@ -282,4 +282,9 @@ draw_get_shader_param(unsigned shader, enum pipe_shader_cap param);
 int
 draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param);
 
+#ifdef HAVE_LLVM
+boolean
+draw_get_option_use_llvm(void);
+#endif
+
 #endif /* DRAW_CONTEXT_H */
index cde07563c3b1942263c8aa7e356b6b22f71f912b..c8ed95ae7d39b2eb1fd6f00f26948253c1c54eed 100644 (file)
@@ -29,6 +29,9 @@
 
 #include "draw_private.h"
 #include "draw_context.h"
+#ifdef HAVE_LLVM
+#include "draw_llvm.h"
+#endif
 
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_exec.h"
@@ -67,7 +70,7 @@ draw_gs_get_input_index(int semantic, int index,
 static INLINE boolean
 draw_gs_should_flush(struct draw_geometry_shader *shader)
 {
-   return (shader->fetched_prim_count == 4);
+   return (shader->fetched_prim_count == shader->vector_length);
 }
 
 /*#define DEBUG_OUTPUTS 1*/
@@ -182,7 +185,7 @@ static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
 }
 
 static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
-                            const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 
+                            const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
 {
    struct tgsi_exec_machine *machine = shader->machine;
@@ -205,10 +208,148 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
    /* run interpreter */
    tgsi_exec_machine_run(machine);
 
-   return 
+   return
       machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
 }
 
+#ifdef HAVE_LLVM
+
+static void
+llvm_fetch_gs_input(struct draw_geometry_shader *shader,
+                    unsigned *indices,
+                    unsigned num_vertices,
+                    unsigned prim_idx)
+{
+   unsigned slot, vs_slot, i;
+   unsigned input_vertex_stride = shader->input_vertex_stride;
+   const float (*input_ptr)[4];
+   float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
+
+   input_ptr = shader->input;
+
+   for (i = 0; i < num_vertices; ++i) {
+      const float (*input)[4];
+#if DEBUG_INPUTS
+      debug_printf("%d) vertex index = %d (prim idx = %d)\n",
+                   i, indices[i], prim_idx);
+#endif
+      input = (const float (*)[4])(
+         (const char *)input_ptr + (indices[i] * input_vertex_stride));
+      for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
+         if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
+            (*input_data)[i][slot][0][prim_idx] = (float)shader->in_prim_idx;
+            (*input_data)[i][slot][1][prim_idx] = (float)shader->in_prim_idx;
+            (*input_data)[i][slot][2][prim_idx] = (float)shader->in_prim_idx;
+            (*input_data)[i][slot][3][prim_idx] = (float)shader->in_prim_idx;
+         } else {
+            vs_slot = draw_gs_get_input_index(
+                        shader->info.input_semantic_name[slot],
+                        shader->info.input_semantic_index[slot],
+                        shader->input_info);
+#if DEBUG_INPUTS
+            debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+                         slot, vs_slot, idx);
+#endif
+#if 0
+            assert(!util_is_inf_or_nan(input[vs_slot][0]));
+            assert(!util_is_inf_or_nan(input[vs_slot][1]));
+            assert(!util_is_inf_or_nan(input[vs_slot][2]));
+            assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+            (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
+            (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
+            (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
+            (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
+#if DEBUG_INPUTS
+            debug_printf("\t\t%f %f %f %f\n",
+                         (*input_data)[i][slot][0][prim_idx],
+                         (*input_data)[i][slot][1][prim_idx],
+                         (*input_data)[i][slot][2][prim_idx],
+                         (*input_data)[i][slot][3][prim_idx]);
+#endif
+            ++vs_slot;
+         }
+      }
+   }
+}
+
+static void
+llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
+                      unsigned num_primitives,
+                      float (**p_output)[4])
+{
+   int total_verts = 0;
+   int vertex_count = 0;
+   int total_prims = 0;
+   int max_prims_per_invocation = 0;
+   char *output_ptr = (char*)shader->gs_output;
+   int i, j, prim_idx;
+
+   for (i = 0; i < shader->vector_length; ++i) {
+      int prims = shader->llvm_emitted_primitives[i];
+      total_prims += prims;
+      max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
+   }
+   for (i = 0; i < shader->vector_length; ++i) {
+      total_verts += shader->llvm_emitted_vertices[i];
+   }
+
+
+   output_ptr += shader->emitted_vertices * shader->vertex_size;
+   for (i = 0; i < shader->vector_length - 1; ++i) {
+      int current_verts = shader->llvm_emitted_vertices[i];
+
+      if (current_verts != shader->max_output_vertices) {
+         memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
+                output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size,
+                shader->vertex_size * (total_verts - vertex_count - current_verts));
+      }
+      vertex_count += current_verts;
+   }
+
+   prim_idx = 0;
+   for (i = 0; i < shader->vector_length; ++i) {
+      int num_prims = shader->llvm_emitted_primitives[i];
+      for (j = 0; j < num_prims; ++j) {
+         int prim_length =
+            shader->llvm_prim_lengths[j][i];
+         shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
+            prim_length;
+         ++prim_idx;
+      }
+   }
+
+   shader->emitted_primitives += total_prims;
+   shader->emitted_vertices += total_verts;
+}
+
+static void
+llvm_gs_prepare(struct draw_geometry_shader *shader,
+                const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+                const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
+{
+}
+
+static unsigned
+llvm_gs_run(struct draw_geometry_shader *shader,
+            unsigned input_primitives)
+{
+   unsigned ret;
+   char *input = (char*)shader->gs_output;
+
+   input += (shader->emitted_vertices * shader->vertex_size);
+
+   ret = shader->current_variant->jit_func(
+      shader->jit_context, shader->gs_input->data,
+      (struct vertex_header*)input,
+      input_primitives,
+      shader->draw->instance_id);
+
+   return ret;
+}
+
+#endif
+
 static void gs_flush(struct draw_geometry_shader *shader)
 {
    unsigned out_prim_count;
@@ -219,13 +360,15 @@ static void gs_flush(struct draw_geometry_shader *shader)
                 input_primitives <= 4);
 
    out_prim_count = shader->run(shader, input_primitives);
+   shader->fetch_outputs(shader, out_prim_count,
+                         &shader->tmp_output);
+
 #if 0
    debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
                 shader->emitted_primitives, shader->emitted_vertices,
                 out_prim_count);
 #endif
-   shader->fetch_outputs(shader, out_prim_count,
-                         &shader->tmp_output);
+
    shader->fetched_prim_count = 0;
 }
 
@@ -331,8 +474,8 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
  * Execute geometry shader.
  */
 int draw_geometry_shader_run(struct draw_geometry_shader *shader,
-                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 
-                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], 
+                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
                              const struct draw_vertex_info *input_verts,
                              const struct draw_prim_info *input_prim,
                              const struct tgsi_shader_info *input_info,
@@ -344,14 +487,20 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
    unsigned num_outputs = shader->info.num_outputs;
    unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
    unsigned num_input_verts = input_prim->linear ?
-                              input_verts->count :
-                              input_prim->count;
+      input_verts->count :
+      input_prim->count;
    unsigned num_in_primitives =
-      MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
-           u_gs_prims_for_vertices(shader->input_primitive, num_input_verts));
+      align(
+         MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
+              u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)),
+         shader->vector_length);
    unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive,
                                                     shader->max_output_vertices)
-                            * num_in_primitives;
+      * num_in_primitives;
+
+   //Assume at least one primitive
+   max_out_prims = MAX2(max_out_prims, 1);
+
 
    output_verts->vertex_size = vertex_size;
    output_verts->stride = output_verts->vertex_size;
@@ -385,6 +534,34 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
    FREE(shader->primitive_lengths);
    shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
 
+
+#ifdef HAVE_LLVM
+   if (draw_get_option_use_llvm()) {
+      shader->gs_output = output_verts->verts;
+      if (max_out_prims > shader->max_out_prims) {
+         unsigned i;
+         if (shader->llvm_prim_lengths) {
+            for (i = 0; i < shader->max_out_prims; ++i) {
+               align_free(shader->llvm_prim_lengths[i]);
+            }
+            FREE(shader->llvm_prim_lengths);
+         }
+
+         shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
+         for (i = 0; i < max_out_prims; ++i) {
+            int vector_size = shader->vector_length * sizeof(unsigned);
+            shader->llvm_prim_lengths[i] =
+               align_malloc(vector_size, vector_size);
+         }
+
+         shader->max_out_prims = max_out_prims;
+      }
+      shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
+      shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
+      shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
+   }
+#endif
+
    shader->prepare(shader, constants, constants_size);
 
    if (input_prim->linear)
@@ -464,10 +641,27 @@ struct draw_geometry_shader *
 draw_create_geometry_shader(struct draw_context *draw,
                             const struct pipe_shader_state *state)
 {
+#ifdef HAVE_LLVM
+   struct llvm_geometry_shader *llvm_gs;
+#endif
    struct draw_geometry_shader *gs;
    unsigned i;
 
-   gs = CALLOC_STRUCT(draw_geometry_shader);
+#ifdef HAVE_LLVM
+   if (draw_get_option_use_llvm()) {
+      llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
+
+      if (llvm_gs == NULL)
+         return NULL;
+
+      gs = &llvm_gs->base;
+
+      make_empty_list(&llvm_gs->variants);
+   } else
+#endif
+   {
+      gs = CALLOC_STRUCT(draw_geometry_shader);
+   }
 
    if (!gs)
       return NULL;
@@ -486,6 +680,17 @@ draw_create_geometry_shader(struct draw_context *draw,
    gs->input_primitive = PIPE_PRIM_TRIANGLES;
    gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
    gs->max_output_vertices = 32;
+   gs->max_out_prims = 0;
+
+   if (draw_get_option_use_llvm()) {
+      /* TODO: change the input array to handle the following
+         vector length, instead of the currently hardcoded
+         TGSI_NUM_CHANNELS
+      gs->vector_length = lp_native_vector_width / 32;*/
+      gs->vector_length = TGSI_NUM_CHANNELS;
+   } else {
+      gs->vector_length = TGSI_NUM_CHANNELS;
+   }
 
    for (i = 0; i < gs->info.num_properties; ++i) {
       if (gs->info.properties[i].name ==
@@ -507,10 +712,36 @@ draw_create_geometry_shader(struct draw_context *draw,
 
    gs->machine = draw->gs.tgsi.machine;
 
-   gs->fetch_outputs = tgsi_fetch_gs_outputs;
-   gs->fetch_inputs = tgsi_fetch_gs_input;
-   gs->prepare = tgsi_gs_prepare;
-   gs->run = tgsi_gs_run;
+#ifdef HAVE_LLVM
+   if (draw_get_option_use_llvm()) {
+      int vector_size = gs->vector_length * sizeof(float);
+      gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
+      memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
+      gs->llvm_prim_lengths = 0;
+
+      gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
+      gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
+
+      gs->fetch_outputs = llvm_fetch_gs_outputs;
+      gs->fetch_inputs = llvm_fetch_gs_input;
+      gs->prepare = llvm_gs_prepare;
+      gs->run = llvm_gs_run;
+
+      gs->jit_context = &draw->llvm->gs_jit_context;
+
+
+      llvm_gs->variant_key_size =
+         draw_gs_llvm_variant_key_size(
+            MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
+                 gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
+   } else
+#endif
+   {
+      gs->fetch_outputs = tgsi_fetch_gs_outputs;
+      gs->fetch_inputs = tgsi_fetch_gs_input;
+      gs->prepare = tgsi_gs_prepare;
+      gs->run = tgsi_gs_run;
+   }
 
    return gs;
 }
@@ -535,7 +766,42 @@ void draw_bind_geometry_shader(struct draw_context *draw,
 void draw_delete_geometry_shader(struct draw_context *draw,
                                  struct draw_geometry_shader *dgs)
 {
+#ifdef HAVE_LLVM
+   if (draw_get_option_use_llvm()) {
+      struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
+      struct draw_gs_llvm_variant_list_item *li;
+
+      li = first_elem(&shader->variants);
+      while(!at_end(&shader->variants, li)) {
+         struct draw_gs_llvm_variant_list_item *next = next_elem(li);
+         draw_gs_llvm_destroy_variant(li->base);
+         li = next;
+      }
+
+      assert(shader->variants_cached == 0);
+
+      if (dgs->llvm_prim_lengths) {
+         unsigned i;
+         for (i = 0; i < dgs->max_out_prims; ++i) {
+            align_free(dgs->llvm_prim_lengths[i]);
+         }
+         FREE(dgs->llvm_prim_lengths);
+      }
+      align_free(dgs->llvm_emitted_primitives);
+      align_free(dgs->llvm_emitted_vertices);
+
+      align_free(dgs->gs_input);
+   }
+#endif
+
    FREE(dgs->primitive_lengths);
    FREE((void*) dgs->state.tokens);
    FREE(dgs);
 }
+
+
+void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
+                                 struct draw_gs_llvm_variant *variant)
+{
+   shader->current_variant = variant;
+}
index 7ab4f04a33da78c5803e30b43776722855585a43..e62b34a17948a878134cfb90dfb756cd65f47c24 100644 (file)
 #include "draw_context.h"
 #include "draw_private.h"
 
-
 #define MAX_TGSI_PRIMITIVES 4
 
 struct draw_context;
 
+#ifdef HAVE_LLVM
+struct draw_gs_jit_context;
+struct draw_gs_llvm_variant;
+
+/**
+ * Structure holding the inputs to the geometry shader. It uses SOA layout.
+ * The dimensions are as follows:
+ * - maximum number of vertices for a geometry shader input primitive
+ *   (6 for triangle_adjacency)
+ * - maximum number of attributes for each vertex
+ * - four channels per each attribute (x,y,z,w)
+ * - number of input primitives equal to the SOA vector length
+ */
+struct draw_gs_inputs {
+   float data[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS];
+};
+#endif
+
 /**
  * Private version of the compiled geometry shader
  */
@@ -66,6 +83,19 @@ struct draw_geometry_shader {
    unsigned fetched_prim_count;
    const float (*input)[4];
    const struct tgsi_shader_info *input_info;
+   unsigned vector_length;
+   unsigned max_out_prims;
+
+#ifdef HAVE_LLVM
+   struct draw_gs_inputs *gs_input;
+   struct draw_gs_jit_context *jit_context;
+   struct draw_gs_llvm_variant *current_variant;
+   struct vertex_header *gs_output;
+
+   int **llvm_prim_lengths;
+   int *llvm_emitted_primitives;
+   int *llvm_emitted_vertices;
+#endif
 
    void (*fetch_inputs)(struct draw_geometry_shader *shader,
                         unsigned *indices,
@@ -102,4 +132,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
 int draw_gs_max_output_vertices(struct draw_geometry_shader *shader,
                                 unsigned pipe_prim);
 
+void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
+                                 struct draw_gs_llvm_variant *variant);
+
 #endif
index 602839d0d44a0bf6253fb045b40d574723f02573..e46195b634ed920e55b0b7588ea8e57b22989d42 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "draw_context.h"
 #include "draw_vs.h"
+#include "draw_gs.h"
 
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_logic.h"
@@ -228,6 +229,85 @@ create_jit_context_type(struct gallivm_state *gallivm,
 }
 
 
+/**
+ * Create LLVM type for struct draw_gs_jit_context
+ */
+static LLVMTypeRef
+create_gs_jit_context_type(struct gallivm_state *gallivm,
+                           unsigned vector_length,
+                           LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
+                           const char *struct_name)
+{
+   LLVMTargetDataRef target = gallivm->target;
+   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
+   LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef elem_types[8];
+   LLVMTypeRef context_type;
+
+   elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
+                                 LP_MAX_TGSI_CONST_BUFFERS);
+   elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
+                                                 DRAW_TOTAL_CLIP_PLANES), 0);
+   elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
+   
+   elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
+   elem_types[4] = LLVMPointerType(LLVMVectorType(int_type,
+                                                  vector_length), 0);
+   elem_types[5] = LLVMPointerType(LLVMVectorType(int_type,
+                                                  vector_length), 0);
+
+   elem_types[6] = LLVMArrayType(texture_type,
+                                 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
+   elem_types[7] = LLVMArrayType(sampler_type,
+                                 PIPE_MAX_SAMPLERS); /* samplers */
+
+   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
+                                          Elements(elem_types), 0);
+#if HAVE_LLVM < 0x0300
+   LLVMAddTypeName(gallivm->module, struct_name, context_type);
+
+   LLVMInvalidateStructLayout(gallivm->target, context_type);
+#endif
+
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
+                          target, context_type, 0);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
+                          target, context_type, 1);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport,
+                          target, context_type, 2);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
+                          target, context_type, 3);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
+                          target, context_type, 4);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
+                          target, context_type, 5);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
+                          target, context_type,
+                          DRAW_GS_JIT_CTX_TEXTURES);
+   LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
+                          target, context_type,
+                          DRAW_GS_JIT_CTX_SAMPLERS);
+   LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
+                        target, context_type);
+
+   return context_type;
+}
+
+
+static LLVMTypeRef
+create_gs_jit_input_type(struct gallivm_state *gallivm)
+{
+   LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
+   LLVMTypeRef input_array;
+
+   input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
+   input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
+   input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
+   input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
+
+   return input_array;
+}
+
 /**
  * Create LLVM type for struct pipe_vertex_buffer
  */
@@ -397,6 +477,9 @@ draw_llvm_create(struct draw_context *draw)
    llvm->nr_variants = 0;
    make_empty_list(&llvm->vs_variants_list);
 
+   llvm->nr_gs_variants = 0;
+   make_empty_list(&llvm->gs_variants_list);
+
    return llvm;
 }
 
@@ -498,7 +581,8 @@ generate_vs(struct draw_llvm_variant *variant,
                      inputs,
                      outputs,
                      sampler,
-                     &llvm->draw->vs.vertex_shader->info);
+                     &llvm->draw->vs.vertex_shader->info,
+                     NULL);
 
    {
       LLVMValueRef out;
@@ -695,6 +779,7 @@ static void
 store_aos_array(struct gallivm_state *gallivm,
                 struct lp_type soa_type,
                 LLVMValueRef io_ptr,
+                LLVMValueRef *indices,
                 LLVMValueRef* aos,
                 int attrib,
                 int num_outputs,
@@ -707,11 +792,15 @@ store_aos_array(struct gallivm_state *gallivm,
    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
    int vector_length = soa_type.length;
    int i;
-   
+
    debug_assert(TGSI_NUM_CHANNELS == 4);
 
    for (i = 0; i < vector_length; i++) {
-      inds[i] = lp_build_const_int32(gallivm, i);
+      if (indices) {
+         inds[i] = indices[i];
+      } else {
+         inds[i] = lp_build_const_int32(gallivm, i);
+      }
       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
    }
 
@@ -753,6 +842,7 @@ store_aos_array(struct gallivm_state *gallivm,
 static void
 convert_to_aos(struct gallivm_state *gallivm,
                LLVMValueRef io,
+               LLVMValueRef *indices,
                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                LLVMValueRef clipmask,
                int num_outputs,
@@ -803,7 +893,7 @@ convert_to_aos(struct gallivm_state *gallivm,
 
       store_aos_array(gallivm,
                       soa_type,
-                      io,
+                      io, indices,
                       aos,
                       attrib,
                       num_outputs,
@@ -821,14 +911,14 @@ convert_to_aos(struct gallivm_state *gallivm,
 static void
 store_clip(struct gallivm_state *gallivm,
            const struct lp_type vs_type,
-           LLVMValueRef io_ptr,           
+           LLVMValueRef io_ptr,
            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
            boolean pre_clip_pos, int idx)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef soa[4];
    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef indices[2]; 
+   LLVMValueRef indices[2];
    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
@@ -836,7 +926,7 @@ store_clip(struct gallivm_state *gallivm,
 
    indices[0] =
    indices[1] = lp_build_const_int32(gallivm, 0);
-   
+
    for (i = 0; i < vs_type.length; i++) {
       inds[i] = lp_build_const_int32(gallivm, i);
       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
@@ -893,13 +983,13 @@ generate_viewport(struct draw_llvm_variant *variant,
    struct lp_type f32_type = vs_type;
    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/
-   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/ 
+   LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
    LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
 
    /* for 1/w convention*/
    out3 = LLVMBuildFDiv(builder, const1, out3, "");
    LLVMBuildStore(builder, out3, outputs[0][3]);
-  
+
    /* Viewport Mapping */
    for (i=0; i<3; i++) {
       LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/
@@ -908,7 +998,7 @@ generate_viewport(struct draw_llvm_variant *variant,
       LLVMValueRef scale_i;
       LLVMValueRef trans_i;
       LLVMValueRef index;
-      
+
       index = lp_build_const_int32(gallivm, i);
       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
 
@@ -930,14 +1020,14 @@ generate_viewport(struct draw_llvm_variant *variant,
       /* store transformed outputs */
       LLVMBuildStore(builder, out, outputs[0][i]);
    }
-   
+
 }
 
 
 /**
  * Returns clipmask as nxi32 bitmask for the n vertices
  */
-static LLVMValueRef 
+static LLVMValueRef
 generate_clipmask(struct draw_llvm *llvm,
                   struct gallivm_state *gallivm,
                   struct lp_type vs_type,
@@ -952,7 +1042,7 @@ generate_clipmask(struct draw_llvm *llvm,
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
-   LLVMValueRef test, temp; 
+   LLVMValueRef test, temp;
    LLVMValueRef zero, shift;
    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
@@ -967,7 +1057,7 @@ generate_clipmask(struct draw_llvm *llvm,
 
    cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
    cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
-  
+
    if (cd[0] != pos || cd[1] != pos)
       have_cd = true;
 
@@ -1002,27 +1092,27 @@ generate_clipmask(struct draw_llvm *llvm,
       /* plane 1 */
       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
       temp = shift;
-      test = LLVMBuildAnd(builder, test, temp, ""); 
+      test = LLVMBuildAnd(builder, test, temp, "");
       mask = test;
-   
+
       /* plane 2 */
       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
       temp = LLVMBuildShl(builder, temp, shift, "");
-      test = LLVMBuildAnd(builder, test, temp, ""); 
+      test = LLVMBuildAnd(builder, test, temp, "");
       mask = LLVMBuildOr(builder, mask, test, "");
-   
+
       /* plane 3 */
       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
       temp = LLVMBuildShl(builder, temp, shift, "");
-      test = LLVMBuildAnd(builder, test, temp, ""); 
+      test = LLVMBuildAnd(builder, test, temp, "");
       mask = LLVMBuildOr(builder, mask, test, "");
 
       /* plane 4 */
       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
       temp = LLVMBuildShl(builder, temp, shift, "");
-      test = LLVMBuildAnd(builder, test, temp, ""); 
+      test = LLVMBuildAnd(builder, test, temp, "");
       mask = LLVMBuildOr(builder, mask, test, "");
    }
 
@@ -1031,22 +1121,22 @@ generate_clipmask(struct draw_llvm *llvm,
       if (clip_halfz) {
          /* plane 5 */
          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
-         test = LLVMBuildAnd(builder, test, temp, ""); 
+         test = LLVMBuildAnd(builder, test, temp, "");
          mask = LLVMBuildOr(builder, mask, test, "");
-      }  
+      }
       else {
          /* plane 5 */
          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
-         test = LLVMBuildAnd(builder, test, temp, ""); 
+         test = LLVMBuildAnd(builder, test, temp, "");
          mask = LLVMBuildOr(builder, mask, test, "");
       }
       /* plane 6 */
       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
       temp = LLVMBuildShl(builder, temp, shift, "");
-      test = LLVMBuildAnd(builder, test, temp, ""); 
+      test = LLVMBuildAnd(builder, test, temp, "");
       mask = LLVMBuildOr(builder, mask, test, "");
-   }   
+   }
 
    if (clip_user) {
       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
@@ -1118,7 +1208,7 @@ generate_clipmask(struct draw_llvm *llvm,
 
 /**
  * Returns boolean if any clipping has occurred
- * Used zero/non-zero i32 value to represent boolean 
+ * Used zero/non-zero i32 value to represent boolean
  */
 static LLVMValueRef
 clipmask_booli32(struct gallivm_state *gallivm,
@@ -1144,6 +1234,94 @@ clipmask_booli32(struct gallivm_state *gallivm,
    return ret;
 }
 
+static void
+draw_gs_llvm_emit_vertex(struct lp_build_tgsi_context * bld_base,
+                         LLVMValueRef (*outputs)[4],
+                         LLVMValueRef emitted_vertices_vec,
+                         void *user_data)
+{
+   struct draw_gs_llvm_variant *variant =
+      (struct draw_gs_llvm_variant *)user_data;
+   struct gallivm_state *gallivm = variant->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_type gs_type = bld_base->base.type;
+   LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
+                                                  lp_int_type(gs_type), 0);
+   LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
+   LLVMValueRef max_output_vertices =
+      lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices);
+   LLVMValueRef io = variant->io_ptr;
+   unsigned i;
+   const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
+
+   for (i = 0; i < gs_type.length; ++i) {
+      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+      LLVMValueRef currently_emitted =
+         LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
+      indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
+      indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
+   }
+
+   convert_to_aos(gallivm, io, indices,
+                  outputs, clipmask,
+                  gs_info->num_outputs, gs_type,
+                  FALSE);
+}
+
+static void
+draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * bld_base,
+                           LLVMValueRef verts_per_prim_vec,
+                           LLVMValueRef emitted_prims_vec,
+                           void *user_data)
+{
+   struct draw_gs_llvm_variant *variant =
+      (struct draw_gs_llvm_variant *)user_data;
+   struct gallivm_state *gallivm = variant->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef prim_lengts_ptr =
+      draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
+   unsigned i;
+
+   for (i = 0; i < bld_base->base.type.length; ++i) {
+      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+      LLVMValueRef prims_emitted =
+         LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
+      LLVMValueRef store_ptr;
+      LLVMValueRef num_vertices =
+         LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
+
+      /*lp_build_printf(gallivm, "XXXX emitting vertices, %d\n\n",
+                        num_vertices);*/
+      
+      store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
+      store_ptr = LLVMBuildLoad(builder, store_ptr, "");
+      store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
+      LLVMBuildStore(builder, num_vertices, store_ptr);
+   }
+}
+
+static void
+draw_gs_llvm_epilogue(struct lp_build_tgsi_context * bld_base,
+                      LLVMValueRef total_emitted_vertices_vec,
+                      LLVMValueRef emitted_prims_vec,
+                      void *user_data)
+{
+   struct draw_gs_llvm_variant *variant =
+      (struct draw_gs_llvm_variant *)user_data;
+   struct gallivm_state *gallivm = variant->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef emitted_verts_ptr =
+      draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
+   LLVMValueRef emitted_prims_ptr =
+      draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
+   LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+   
+   emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
+   emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
+   
+   LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
+   LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
+}
 
 static void
 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
@@ -1323,7 +1501,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                                      &true_index, 1, "");
             true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
          }
-         
+
          system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder,
                                                           system_values.vertex_id, true_index,
                                                           lp_build_const_int32(gallivm, i), "");
@@ -1387,11 +1565,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
          clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
       }
 
-      /* store clipmask in vertex header, 
-       * original positions in clip 
-       * and transformed positions in data 
-       */   
-      convert_to_aos(gallivm, io, outputs, clipmask,
+      /* store clipmask in vertex header,
+       * original positions in clip
+       * and transformed positions in data
+       */
+      convert_to_aos(gallivm, io, NULL, outputs, clipmask,
                      vs_info->num_outputs, vs_type,
                      have_clipdist);
    }
@@ -1437,8 +1615,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
    key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
+   key->has_gs = llvm->draw->gs.geometry_shader != NULL;
    key->pad1 = 0;
-   key->pad2 = 0;
 
    /* All variants of this shader will have the same value for
     * nr_samplers.  Not yet trying to compact away holes in the
@@ -1487,6 +1665,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
    debug_printf("clip_halfz = %u\n", key->clip_halfz);
    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
+   debug_printf("has_gs = %u\n", key->has_gs);
    debug_printf("ucp_enable = %u\n", key->ucp_enable);
 
    for (i = 0 ; i < key->nr_vertex_elements; i++) {
@@ -1504,6 +1683,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
 
 void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
+                             unsigned shader_stage,
                              unsigned sview_idx,
                              uint32_t width, uint32_t height, uint32_t depth,
                              uint32_t first_level, uint32_t last_level,
@@ -1515,9 +1695,18 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
    unsigned j;
    struct draw_jit_texture *jit_tex;
 
-   assert(sview_idx < Elements(draw->llvm->jit_context.textures));
+   assert(shader_stage == PIPE_SHADER_VERTEX ||
+          shader_stage == PIPE_SHADER_GEOMETRY);
+
+   if (shader_stage == PIPE_SHADER_VERTEX) {
+      assert(sview_idx < Elements(draw->llvm->jit_context.textures));
 
-   jit_tex = &draw->llvm->jit_context.textures[sview_idx];
+      jit_tex = &draw->llvm->jit_context.textures[sview_idx];
+   } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
+      assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures));
+
+      jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
+   }
 
    jit_tex->width = width;
    jit_tex->height = height;
@@ -1551,6 +1740,19 @@ draw_llvm_set_sampler_state(struct draw_context *draw)
          COPY_4V(jit_sam->border_color, s->border_color.f);
       }
    }
+
+   for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
+      struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
+
+      if (draw->samplers[i]) {
+         const struct pipe_sampler_state *s
+            = draw->samplers[PIPE_SHADER_GEOMETRY][i];
+         jit_sam->min_lod = s->min_lod;
+         jit_sam->max_lod = s->max_lod;
+         jit_sam->lod_bias = s->lod_bias;
+         COPY_4V(jit_sam->border_color, s->border_color.f);
+      }
+   }
 }
 
 
@@ -1577,3 +1779,298 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
    llvm->nr_variants--;
    FREE(variant);
 }
+
+
+/**
+ * Create LLVM types for various structures.
+ */
+static void
+create_gs_jit_types(struct draw_gs_llvm_variant *var)
+{
+   struct gallivm_state *gallivm = var->gallivm;
+   LLVMTypeRef texture_type, sampler_type, context_type;
+
+   texture_type = create_jit_texture_type(gallivm, "texture");
+   sampler_type = create_jit_sampler_type(gallivm, "sampler");
+
+   context_type = create_gs_jit_context_type(gallivm,
+                                             var->shader->base.vector_length,
+                                             texture_type, sampler_type,
+                                             "draw_gs_jit_context");
+   var->context_ptr_type = LLVMPointerType(context_type, 0);
+
+   var->input_array_type = create_gs_jit_input_type(gallivm);
+}
+
+static LLVMTypeRef
+get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
+{
+   if (!variant->context_ptr_type)
+      create_gs_jit_types(variant);
+   return variant->context_ptr_type;
+}
+
+static LLVMValueRef
+generate_mask_value(struct draw_gs_llvm_variant *variant,
+                    struct lp_type gs_type)
+{
+   struct gallivm_state *gallivm = variant->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef bits[16];
+   struct lp_type  mask_type = lp_int_type(gs_type);
+   struct lp_type mask_elem_type = lp_elem_type(mask_type);
+   LLVMValueRef mask_val = lp_build_const_vec(gallivm,
+                                              mask_type,
+                                              0);
+   unsigned i;
+
+   assert(gs_type.length <= Elements(bits));
+
+   for (i = gs_type.length; i >= 1; --i) {
+      int idx = i - 1;
+      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+      bits[idx] = lp_build_compare(gallivm,
+                                   mask_elem_type, PIPE_FUNC_GEQUAL,
+                                   variant->num_prims, ind);
+   }
+   for (i = 0; i < gs_type.length; ++i) {
+      LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+      mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
+   }
+   mask_val = lp_build_compare(gallivm,
+                               mask_type, PIPE_FUNC_NOTEQUAL,
+                               mask_val,
+                               lp_build_const_int_vec(gallivm, mask_type, 0));
+
+   return mask_val;
+}
+
+static void
+draw_gs_llvm_generate(struct draw_llvm *llvm,
+                      struct draw_gs_llvm_variant *variant)
+{
+   struct gallivm_state *gallivm = variant->gallivm;
+   LLVMContextRef context = gallivm->context;
+   LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
+   LLVMTypeRef arg_types[5];
+   LLVMTypeRef func_type;
+   LLVMValueRef variant_func;
+   LLVMValueRef context_ptr;
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef builder;
+   LLVMValueRef io_ptr, input_array, num_prims, mask_val;
+   struct lp_build_sampler_soa *sampler = 0;
+   struct lp_build_context bld;
+   struct lp_bld_tgsi_system_values system_values;
+   struct lp_type gs_type;
+   unsigned i;
+   struct lp_build_tgsi_gs_iface gs_iface;
+   const struct tgsi_token *tokens = variant->shader->base.state.tokens;
+   LLVMValueRef consts_ptr;
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+   struct lp_build_mask_context mask;
+
+   memset(&system_values, 0, sizeof(system_values));
+
+   assert(variant->vertex_header_ptr_type);
+
+   arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
+   arg_types[1] = variant->input_array_type;           /* input */
+   arg_types[2] = variant->vertex_header_ptr_type;     /* vertex_header */
+   arg_types[3] = int32_type;                          /* num_prims */
+   arg_types[4] = int32_type;                          /* instance_id */
+
+   func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
+
+   variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader",
+                                  func_type);
+   variant->function = variant_func;
+
+   LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
+
+   for (i = 0; i < Elements(arg_types); ++i)
+      if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+         LLVMAddAttribute(LLVMGetParam(variant_func, i),
+                          LLVMNoAliasAttribute);
+
+   context_ptr               = LLVMGetParam(variant_func, 0);
+   input_array               = LLVMGetParam(variant_func, 1);
+   io_ptr                    = LLVMGetParam(variant_func, 2);
+   num_prims                 = LLVMGetParam(variant_func, 3);
+   system_values.instance_id = LLVMGetParam(variant_func, 4);
+
+   lp_build_name(context_ptr, "context");
+   lp_build_name(input_array, "input");
+   lp_build_name(io_ptr, "io");
+   lp_build_name(io_ptr, "num_prims");
+   lp_build_name(system_values.instance_id, "instance_id");
+
+   variant->context_ptr = context_ptr;
+   variant->io_ptr = io_ptr;
+   variant->num_prims = num_prims;
+
+   gs_iface.input = input_array;
+   gs_iface.emit_vertex = draw_gs_llvm_emit_vertex;
+   gs_iface.end_primitive = draw_gs_llvm_end_primitive;
+   gs_iface.gs_epilogue = draw_gs_llvm_epilogue;
+   gs_iface.user_data = variant;
+
+   /*
+    * Function body
+    */
+
+   block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
+   builder = gallivm->builder;
+   LLVMPositionBuilderAtEnd(builder, block);
+
+   lp_build_context_init(&bld, gallivm, lp_type_int(32));
+
+   memset(&gs_type, 0, sizeof gs_type);
+   gs_type.floating = TRUE; /* floating point values */
+   gs_type.sign = TRUE;     /* values are signed */
+   gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
+   gs_type.width = 32;      /* 32-bit float */
+   gs_type.length = variant->shader->base.vector_length;
+
+   consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
+
+   /* code generated texture sampling */
+   sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
+                                          context_ptr);
+
+   mask_val = generate_mask_value(variant, gs_type);
+   lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
+
+   lp_build_tgsi_soa(variant->gallivm,
+                     tokens,
+                     gs_type,
+                     &mask,
+                     consts_ptr,
+                     &system_values,
+                     NULL /*pos*/,
+                     NULL,
+                     outputs,
+                     sampler,
+                     &llvm->draw->gs.geometry_shader->info,
+                     &gs_iface);
+
+   lp_build_mask_end(&mask);
+
+   LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
+
+   gallivm_verify_function(gallivm, variant_func);
+}
+
+
+struct draw_gs_llvm_variant *
+draw_gs_llvm_create_variant(struct draw_llvm *llvm,
+                            unsigned num_outputs,
+                            const struct draw_gs_llvm_variant_key *key)
+{
+   struct draw_gs_llvm_variant *variant;
+   struct llvm_geometry_shader *shader =
+      llvm_geometry_shader(llvm->draw->gs.geometry_shader);
+   LLVMTypeRef vertex_header;
+
+   variant = MALLOC(sizeof *variant +
+                    shader->variant_key_size -
+                    sizeof variant->key);
+   if (variant == NULL)
+      return NULL;
+
+   variant->llvm = llvm;
+   variant->shader = shader;
+
+   variant->gallivm = gallivm_create();
+
+   create_gs_jit_types(variant);
+
+   memcpy(&variant->key, key, shader->variant_key_size);
+
+   vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
+
+   variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
+
+   draw_gs_llvm_generate(llvm, variant);
+
+   gallivm_compile_module(variant->gallivm);
+
+   variant->jit_func = (draw_gs_jit_func)
+         gallivm_jit_function(variant->gallivm, variant->function);
+
+   variant->list_item_global.base = variant;
+   variant->list_item_local.base = variant;
+   /*variant->no = */shader->variants_created++;
+   variant->list_item_global.base = variant;
+
+   return variant;
+}
+
+void
+draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
+{
+   struct draw_llvm *llvm = variant->llvm;
+
+   if (variant->function) {
+      gallivm_free_function(variant->gallivm,
+                            variant->function, variant->jit_func);
+   }
+
+   gallivm_destroy(variant->gallivm);
+
+   remove_from_list(&variant->list_item_local);
+   variant->shader->variants_cached--;
+   remove_from_list(&variant->list_item_global);
+   llvm->nr_gs_variants--;
+   FREE(variant);
+}
+
+struct draw_gs_llvm_variant_key *
+draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
+{
+   unsigned i;
+   struct draw_gs_llvm_variant_key *key;
+   struct draw_sampler_static_state *draw_sampler;
+
+   key = (struct draw_gs_llvm_variant_key *)store;
+
+   /* All variants of this shader will have the same value for
+    * nr_samplers.  Not yet trying to compact away holes in the
+    * sampler array.
+    */
+   key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+   if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+      key->nr_sampler_views =
+         llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+   }
+   else {
+      key->nr_sampler_views = key->nr_samplers;
+   }
+
+   draw_sampler = key->samplers;
+
+   memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
+
+   for (i = 0 ; i < key->nr_samplers; i++) {
+      lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
+                                      llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
+   }
+   for (i = 0 ; i < key->nr_sampler_views; i++) {
+      lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
+                                      llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
+   }
+
+   return key;
+}
+
+void
+draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
+{
+   unsigned i;
+   struct draw_sampler_static_state *sampler = key->samplers;
+
+   for (i = 0 ; i < key->nr_sampler_views; i++) {
+      debug_printf("sampler[%i].src_format = %s\n", i,
+                   util_format_name(sampler[i].texture_state.format));
+   }
+}
index c03c69e57b341ce4fce3647287f9d473f881c329..fc0d2bd4bf1ef96fbab676acfb43ce33d17e537b 100644 (file)
@@ -31,6 +31,8 @@
 #include "draw/draw_private.h"
 
 #include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
+
 #include "gallivm/lp_bld_sample.h"
 #include "gallivm/lp_bld_limits.h"
 
@@ -40,6 +42,7 @@
 
 struct draw_llvm;
 struct llvm_vertex_shader;
+struct llvm_geometry_shader;
 
 struct draw_jit_texture
 {
@@ -166,6 +169,61 @@ struct draw_jit_context
    lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset")
 
 
+/**
+ * This structure is passed directly to the generated geometry shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the draw_gs_jit_context_* macros.
+ * Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct draw_gs_jit_context
+{
+   const float *constants[LP_MAX_TGSI_CONST_BUFFERS];
+   float (*planes) [DRAW_TOTAL_CLIP_PLANES][4];
+   float *viewport;
+
+   int **prim_lengths;
+   int *emitted_vertices;
+   int *emitted_prims;
+
+   struct draw_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   struct draw_jit_sampler samplers[PIPE_MAX_SAMPLERS];
+};
+
+
+#define draw_gs_jit_context_constants(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, 0, "constants")
+
+#define draw_gs_jit_context_planes(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, 1, "planes")
+
+#define draw_gs_jit_context_viewport(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, 2, "viewport")
+
+#define draw_gs_jit_prim_lengths(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, 3, "prim_lengths")
+
+#define draw_gs_jit_emitted_vertices(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, 4, "emitted_vertices")
+
+#define draw_gs_jit_emitted_prims(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, 5, "emitted_prims")
+
+#define DRAW_GS_JIT_CTX_TEXTURES 6
+#define DRAW_GS_JIT_CTX_SAMPLERS 7
+
+#define draw_gs_jit_context_textures(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_TEXTURES, "textures")
+
+#define draw_gs_jit_context_samplers(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_SAMPLERS, "samplers")
+
+
+
 typedef int
 (*draw_jit_vert_func)(struct draw_jit_context *context,
                       struct vertex_header *io,
@@ -187,6 +245,14 @@ typedef int
                            struct pipe_vertex_buffer *vertex_buffers,
                            unsigned instance_id);
 
+
+typedef int
+(*draw_gs_jit_func)(struct draw_gs_jit_context *context,
+                    float inputs[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS],
+                    struct vertex_header *output,
+                    unsigned num_prims,
+                    unsigned instance_id);
+
 struct draw_llvm_variant_key
 {
    unsigned nr_vertex_elements:8;
@@ -199,13 +265,13 @@ struct draw_llvm_variant_key
    unsigned clip_halfz:1;
    unsigned bypass_viewport:1;
    unsigned need_edgeflags:1;
+   unsigned has_gs:1;
    /*
     * it is important there are no holes in this struct
     * (and all padding gets zeroed).
     */
-   unsigned pad1:1;
    unsigned ucp_enable:PIPE_MAX_CLIP_PLANES;
-   unsigned pad2:32-PIPE_MAX_CLIP_PLANES;
+   unsigned pad1:32-PIPE_MAX_CLIP_PLANES;
 
    /* Variable number of vertex elements:
     */
@@ -216,11 +282,23 @@ struct draw_llvm_variant_key
 /*   struct draw_sampler_static_state sampler; */
 };
 
+struct draw_gs_llvm_variant_key
+{
+   unsigned nr_samplers:8;
+   unsigned nr_sampler_views:8;
+
+   struct draw_sampler_static_state samplers[1];
+};
+
 #define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
    (sizeof(struct draw_llvm_variant_key) +     \
     PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state) + \
     (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
 
+#define DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE \
+   (sizeof(struct draw_gs_llvm_variant_key) +  \
+    PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state))
+
 
 static INLINE size_t
 draw_llvm_variant_key_size(unsigned nr_vertex_elements,
@@ -232,6 +310,14 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements,
 }
 
 
+static INLINE size_t
+draw_gs_llvm_variant_key_size(unsigned nr_samplers)
+{
+   return (sizeof(struct draw_gs_llvm_variant_key) +
+           (nr_samplers - 1) * sizeof(struct draw_sampler_static_state));
+}
+
+
 static INLINE struct draw_sampler_static_state *
 draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
 {
@@ -246,6 +332,13 @@ struct draw_llvm_variant_list_item
    struct draw_llvm_variant_list_item *next, *prev;
 };
 
+struct draw_gs_llvm_variant_list_item
+{
+   struct draw_gs_llvm_variant *base;
+   struct draw_gs_llvm_variant_list_item *next, *prev;
+};
+
+
 struct draw_llvm_variant
 {
    struct gallivm_state *gallivm;
@@ -271,6 +364,32 @@ struct draw_llvm_variant
    struct draw_llvm_variant_key key;
 };
 
+
+struct draw_gs_llvm_variant
+{
+   struct gallivm_state *gallivm;
+
+   /* LLVM JIT builder types */
+   LLVMTypeRef context_ptr_type;
+   LLVMTypeRef vertex_header_ptr_type;
+   LLVMTypeRef input_array_type;
+
+   LLVMValueRef context_ptr;
+   LLVMValueRef io_ptr;
+   LLVMValueRef num_prims;
+   LLVMValueRef function;
+   draw_gs_jit_func jit_func;
+
+   struct llvm_geometry_shader *shader;
+
+   struct draw_llvm *llvm;
+   struct draw_gs_llvm_variant_list_item list_item_global;
+   struct draw_gs_llvm_variant_list_item list_item_local;
+
+   /* key is variable-sized, must be last */
+   struct draw_gs_llvm_variant_key key;
+};
+
 struct llvm_vertex_shader {
    struct draw_vertex_shader base;
 
@@ -280,13 +399,27 @@ struct llvm_vertex_shader {
    unsigned variants_cached;
 };
 
+struct llvm_geometry_shader {
+   struct draw_geometry_shader base;
+
+   unsigned variant_key_size;
+   struct draw_gs_llvm_variant_list_item variants;
+   unsigned variants_created;
+   unsigned variants_cached;
+};
+
+
 struct draw_llvm {
    struct draw_context *draw;
 
    struct draw_jit_context jit_context;
+   struct draw_gs_jit_context gs_jit_context;
 
    struct draw_llvm_variant_list_item vs_variants_list;
    int nr_variants;
+
+   struct draw_gs_llvm_variant_list_item gs_variants_list;
+   int nr_gs_variants;
 };
 
 
@@ -296,6 +429,14 @@ llvm_vertex_shader(struct draw_vertex_shader *vs)
    return (struct llvm_vertex_shader *)vs;
 }
 
+static INLINE struct llvm_geometry_shader *
+llvm_geometry_shader(struct draw_geometry_shader *gs)
+{
+   return (struct llvm_geometry_shader *)gs;
+}
+
+
+
 
 struct draw_llvm *
 draw_llvm_create(struct draw_context *draw);
@@ -317,6 +458,21 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
 void
 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key);
 
+
+struct draw_gs_llvm_variant *
+draw_gs_llvm_create_variant(struct draw_llvm *llvm,
+                            unsigned num_vertex_header_attribs,
+                            const struct draw_gs_llvm_variant_key *key);
+
+void
+draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant);
+
+struct draw_gs_llvm_variant_key *
+draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
+
+void
+draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key);
+
 struct lp_build_sampler_soa *
 draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state,
                              LLVMValueRef context_ptr);
@@ -326,6 +482,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw);
 
 void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
+                             unsigned shader_stage,
                              unsigned sview_idx,
                              uint32_t width, uint32_t height, uint32_t depth,
                              uint32_t first_level, uint32_t last_level,
index fab168c6d473be0c432e085cf97ed2bc11bcc840..ec0f758d45341bd90e0d467685bf28439fe8af1b 100644 (file)
@@ -57,6 +57,71 @@ struct llvm_middle_end {
 };
 
 
+static void
+llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
+{
+   struct draw_context *draw = fpme->draw;
+   struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+   struct draw_gs_llvm_variant_key *key;
+   struct draw_gs_llvm_variant *variant = NULL;
+   struct draw_gs_llvm_variant_list_item *li;
+   struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
+   char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
+   unsigned i;
+
+   key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
+
+   /* Search shader's list of variants for the key */
+   li = first_elem(&shader->variants);
+   while (!at_end(&shader->variants, li)) {
+      if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
+         variant = li->base;
+         break;
+      }
+      li = next_elem(li);
+   }
+
+   if (variant) {
+      /* found the variant, move to head of global list (for LRU) */
+      move_to_head(&fpme->llvm->gs_variants_list,
+                   &variant->list_item_global);
+   }
+   else {
+      /* Need to create new variant */
+
+      /* First check if we've created too many variants.  If so, free
+       * 25% of the LRU to avoid using too much memory.
+       */
+      if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+         /*
+          * XXX: should we flush here ?
+          */
+         for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+            struct draw_gs_llvm_variant_list_item *item;
+            if (is_empty_list(&fpme->llvm->gs_variants_list)) {
+               break;
+            }
+            item = last_elem(&fpme->llvm->gs_variants_list);
+            assert(item);
+            assert(item->base);
+            draw_gs_llvm_destroy_variant(item->base);
+         }
+      }
+
+      variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
+
+      if (variant) {
+         insert_at_head(&shader->variants, &variant->list_item_local);
+         insert_at_head(&fpme->llvm->gs_variants_list,
+                        &variant->list_item_global);
+         fpme->llvm->nr_gs_variants++;
+         shader->variants_cached++;
+      }
+   }
+
+   gs->current_variant = variant;
+}
+
 /**
  * Prepare/validate middle part of the vertex pipeline.
  * NOTE: if you change this function, also look at the non-LLVM
@@ -180,6 +245,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
 
       fpme->current_variant = variant;
    }
+
+   if (gs) {
+      llvm_middle_end_prepare_gs(fpme);
+   }
 }
 
 
@@ -199,11 +268,17 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
    for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) {
       fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
    }
+   for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) {
+      fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
+   }
 
    fpme->llvm->jit_context.planes =
       (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
+   fpme->llvm->gs_jit_context.planes =
+      (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
 
    fpme->llvm->jit_context.viewport = (float *) draw->viewport.scale;
+   fpme->llvm->gs_jit_context.viewport = (float *) draw->viewport.scale;
 }
 
 
index 6e65e126d689cda23ca312228e83484cac08a558..62d4707371fc5d16624f32e39933a4b1680b1714 100644 (file)
@@ -61,6 +61,7 @@ struct tgsi_shader_info;
 struct lp_build_mask_context;
 struct gallivm_state;
 struct lp_derivatives;
+struct lp_build_tgsi_gs_iface;
 
 
 enum lp_build_tex_modifier {
@@ -224,7 +225,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
                   struct lp_build_sampler_soa *sampler,
-                  const struct tgsi_shader_info *info);
+                  const struct tgsi_shader_info *info,
+                  const struct lp_build_tgsi_gs_iface *gs_iface);
 
 
 void
@@ -361,6 +363,24 @@ struct lp_build_tgsi_context
    void (*emit_epilogue)(struct lp_build_tgsi_context*);
 };
 
+struct lp_build_tgsi_gs_iface
+{
+   LLVMValueRef input;
+   void (*emit_vertex)(struct lp_build_tgsi_context * bld_base,
+                       LLVMValueRef (*outputs)[4],
+                       LLVMValueRef emitted_vertices_vec,
+                       void *user_data);
+   void (*end_primitive)(struct lp_build_tgsi_context * bld_base,
+                         LLVMValueRef verts_per_prim_vec,
+                         LLVMValueRef emitted_prims_vec,
+                         void *user_data);
+   void (*gs_epilogue)(struct lp_build_tgsi_context * bld_base,
+                       LLVMValueRef total_emitted_vertices_vec,
+                       LLVMValueRef emitted_prims_vec,
+                       void *user_data);
+   void *user_data;
+};
+
 struct lp_build_tgsi_soa_context
 {
    struct lp_build_tgsi_context bld_base;
@@ -368,6 +388,11 @@ struct lp_build_tgsi_soa_context
    /* Builder for scalar elements of shader's data type (float) */
    struct lp_build_context elem_bld;
 
+   const struct lp_build_tgsi_gs_iface *gs_iface;
+   LLVMValueRef emitted_prims_vec;
+   LLVMValueRef total_emitted_vertices_vec;
+   LLVMValueRef emitted_vertices_vec;
+
    LLVMValueRef consts_ptr;
    const LLVMValueRef *pos;
    const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
index cafc61f60f66e882f1da026945720faefca1a063..6f174a5b50eb20c1e8c07c3db2becbba1c3aa615 100644 (file)
@@ -437,6 +437,26 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
    }
 }
 
+/*
+ * If we have indirect addressing in outputs copy our alloca array
+ * to the outputs slots specified by the caller to make sure
+ * our outputs are delivered consistently via the same interface.
+ */
+static void
+gather_outputs(struct lp_build_tgsi_soa_context * bld)
+{
+   if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
+      unsigned index, chan;
+      assert(bld->bld_base.info->num_outputs <=
+             bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
+      for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
+         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
+         }
+      }
+   }
+}
+
 /**
  * Gather vector.
  * XXX the lp_build_gather() function should be capable of doing this
@@ -757,6 +777,60 @@ emit_fetch_input(
    return res;
 }
 
+
+static LLVMValueRef
+emit_fetch_gs_input(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   enum tgsi_opcode_type stype,
+   unsigned swizzle)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   //struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   LLVMValueRef attrib_index = NULL;
+   LLVMValueRef vertex_index = NULL;
+   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
+   LLVMValueRef indices[3];
+   LLVMValueRef res;
+
+   if (reg->Register.Indirect) {
+      attrib_index = get_indirect_index(bld,
+                                          reg->Register.File,
+                                          reg->Register.Index,
+                                          &reg->Indirect);
+   } else {
+      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
+   }
+   
+   if (reg->Dimension.Indirect) {
+      vertex_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Dimension.Index,
+                                        &reg->DimIndirect);
+   } else {
+      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
+   }
+
+   indices[0] = vertex_index;
+   indices[1] = attrib_index;
+   indices[2] = swizzle_index;
+
+   res = LLVMBuildGEP(builder, bld->gs_iface->input, indices, 3, "");
+   res = LLVMBuildLoad(builder, res, "");
+
+   assert(res);
+
+   if (stype == TGSI_TYPE_UNSIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_SIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   }
+
+   return res;
+}
+
 static LLVMValueRef
 emit_fetch_temporary(
    struct lp_build_tgsi_context * bld_base,
@@ -2081,6 +2155,66 @@ sviewinfo_emit(
    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
 }
 
+static LLVMValueRef
+mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   LLVMValueRef one_vec = bld_base->int_bld.one;
+   struct lp_exec_mask *exec_mask = &bld->exec_mask;
+
+   if (exec_mask->has_mask) {
+      one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
+   }
+   one_vec = LLVMBuildAnd(builder, one_vec,
+                          lp_build_mask_value(bld->mask), "");
+   return one_vec;
+}
+
+static void
+emit_vertex(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+
+   if (bld->gs_iface->emit_vertex) {
+      LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+      gather_outputs(bld);
+      bld->gs_iface->emit_vertex(&bld->bld_base, bld->outputs,
+                                bld->total_emitted_vertices_vec,
+                                bld->gs_iface->user_data);
+      bld->emitted_vertices_vec =
+         LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, "");
+      bld->total_emitted_vertices_vec =
+         LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, "");
+   }
+}
+
+
+static void
+end_primitive(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+
+   if (bld->gs_iface->end_primitive) {
+      LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+      bld->gs_iface->end_primitive(&bld->bld_base,
+                                  bld->emitted_vertices_vec,
+                                  bld->emitted_prims_vec,
+                                  bld->gs_iface->user_data);
+      bld->emitted_prims_vec =
+         LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, "");
+      bld->emitted_vertices_vec = bld_base->uint_bld.zero;
+   }
+}
+
 static void
 cal_emit(
    const struct lp_build_tgsi_action * action,
@@ -2324,7 +2458,7 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
 
    /* If we have indirect addressing in inputs we need to copy them into
     * our alloca array to be able to iterate over them */
-   if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
       unsigned index, chan;
       LLVMTypeRef vec_type = bld_base->base.vec_type;
       LLVMValueRef array_size = lp_build_const_int32(gallivm,
@@ -2349,6 +2483,13 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
          }
       }
    }
+
+   if (bld->gs_iface) {
+      struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+      bld->emitted_prims_vec = uint_bld->zero;
+      bld->emitted_vertices_vec = uint_bld->zero;
+      bld->total_emitted_vertices_vec = uint_bld->zero;
+   }
 }
 
 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
@@ -2361,16 +2502,14 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
    }
 
    /* If we have indirect addressing in outputs we need to copy our alloca array
-    * to the outputs slots specified by the called */
-   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
-      unsigned index, chan;
-      assert(bld_base->info->num_outputs <=
-                        bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
-      for (index = 0; index < bld_base->info->num_outputs; ++index) {
-         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
-         }
-      }
+    * to the outputs slots specified by the caller */
+   if (bld->gs_iface) {
+      bld->gs_iface->gs_epilogue(&bld->bld_base,
+                                bld->total_emitted_vertices_vec,
+                                bld->emitted_prims_vec,
+                                bld->gs_iface->user_data);
+   } else {
+      gather_outputs(bld);
    }
 }
 
@@ -2385,7 +2524,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                   struct lp_build_sampler_soa *sampler,
-                  const struct tgsi_shader_info *info)
+                  const struct tgsi_shader_info *info,
+                  const struct lp_build_tgsi_gs_iface *gs_iface)
 {
    struct lp_build_tgsi_soa_context bld;
 
@@ -2463,6 +2603,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
 
+   if (gs_iface) {
+      /* inputs are always indirect with gs */
+      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
+      bld.gs_iface = gs_iface;
+      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
+      bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
+      bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+   }
+
    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
 
    bld.system_values = *system_values;
index de51f39f159a9720f02058bf85340aad1f5f366b..ea41bd647204eecefaa6b7922431e254c90a8c94 100644 (file)
@@ -354,7 +354,7 @@ generate_fs(struct gallivm_state *gallivm,
    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
                      consts_ptr, &system_values,
                      interp->pos, interp->inputs,
-                     outputs, sampler, &shader->info.base);
+                     outputs, sampler, &shader->info.base, NULL);
 
    /* Alpha test */
    if (key->alpha.enabled) {
@@ -607,7 +607,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
                      consts_ptr, &system_values,
                      interp->pos, interp->inputs,
-                     outputs, sampler, &shader->info.base);
+                     outputs, sampler, &shader->info.base, NULL);
 
    /* Alpha test */
    if (key->alpha.enabled) {