From: Zack Rusin Date: Mon, 18 Feb 2013 12:00:19 +0000 (-0800) Subject: gallium/llvm: implement geometry shaders in the llvm paths X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e96f4e3b853ff5fe4d927c69695c0b5f1966d448;p=mesa.git gallium/llvm: implement geometry shaders in the llvm paths This commits implements code generation of the geometry shaders in the SOA paths. All the code is there but bugs are likely present. Signed-off-by: Zack Rusin Reviewed-by: Brian Paul Reviewed-by: José Fonseca --- diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 6b70ac88be6..d64b82b0261 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -46,7 +46,7 @@ #include "gallivm/lp_bld_limits.h" #include "draw_llvm.h" -static boolean +boolean draw_get_option_use_llvm(void) { static boolean first = TRUE; @@ -808,16 +808,15 @@ draw_set_mapped_texture(struct draw_context *draw, uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]) { - if (shader_stage == PIPE_SHADER_VERTEX) { #ifdef HAVE_LLVM - if (draw->llvm) - draw_llvm_set_mapped_texture(draw, - sview_idx, - width, height, depth, first_level, - last_level, base_ptr, - row_stride, img_stride, mip_offsets); + if (draw->llvm) + draw_llvm_set_mapped_texture(draw, + shader_stage, + sview_idx, + width, height, depth, first_level, + last_level, base_ptr, + row_stride, img_stride, mip_offsets); #endif - } } /** diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 18c8595f8cf..369f6c80fa0 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -282,4 +282,9 @@ draw_get_shader_param(unsigned shader, enum pipe_shader_cap param); int draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param); +#ifdef HAVE_LLVM +boolean +draw_get_option_use_llvm(void); +#endif + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index cde07563c3b..c8ed95ae7d3 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -29,6 +29,9 @@ #include "draw_private.h" #include "draw_context.h" +#ifdef HAVE_LLVM +#include "draw_llvm.h" +#endif #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_exec.h" @@ -67,7 +70,7 @@ draw_gs_get_input_index(int semantic, int index, static INLINE boolean draw_gs_should_flush(struct draw_geometry_shader *shader) { - return (shader->fetched_prim_count == 4); + return (shader->fetched_prim_count == shader->vector_length); } /*#define DEBUG_OUTPUTS 1*/ @@ -182,7 +185,7 @@ static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, } static void tgsi_gs_prepare(struct draw_geometry_shader *shader, - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) { struct tgsi_exec_machine *machine = shader->machine; @@ -205,10 +208,148 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, /* run interpreter */ tgsi_exec_machine_run(machine); - return + return machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; } +#ifdef HAVE_LLVM + +static void +llvm_fetch_gs_input(struct draw_geometry_shader *shader, + unsigned *indices, + unsigned num_vertices, + unsigned prim_idx) +{ + unsigned slot, vs_slot, i; + unsigned input_vertex_stride = shader->input_vertex_stride; + const float (*input_ptr)[4]; + float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; + + input_ptr = shader->input; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; +#if DEBUG_INPUTS + debug_printf("%d) vertex index = %d (prim idx = %d)\n", + i, indices[i], prim_idx); +#endif + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { + if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { + (*input_data)[i][slot][0][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][1][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][2][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][3][prim_idx] = (float)shader->in_prim_idx; + } else { + vs_slot = draw_gs_get_input_index( + shader->info.input_semantic_name[slot], + shader->info.input_semantic_index[slot], + shader->input_info); +#if DEBUG_INPUTS + debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx); +#endif +#if 0 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; + (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; + (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; + (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; +#if DEBUG_INPUTS + debug_printf("\t\t%f %f %f %f\n", + (*input_data)[i][slot][0][prim_idx], + (*input_data)[i][slot][1][prim_idx], + (*input_data)[i][slot][2][prim_idx], + (*input_data)[i][slot][3][prim_idx]); +#endif + ++vs_slot; + } + } + } +} + +static void +llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, + unsigned num_primitives, + float (**p_output)[4]) +{ + int total_verts = 0; + int vertex_count = 0; + int total_prims = 0; + int max_prims_per_invocation = 0; + char *output_ptr = (char*)shader->gs_output; + int i, j, prim_idx; + + for (i = 0; i < shader->vector_length; ++i) { + int prims = shader->llvm_emitted_primitives[i]; + total_prims += prims; + max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); + } + for (i = 0; i < shader->vector_length; ++i) { + total_verts += shader->llvm_emitted_vertices[i]; + } + + + output_ptr += shader->emitted_vertices * shader->vertex_size; + for (i = 0; i < shader->vector_length - 1; ++i) { + int current_verts = shader->llvm_emitted_vertices[i]; + + if (current_verts != shader->max_output_vertices) { + memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size, + output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size, + shader->vertex_size * (total_verts - vertex_count - current_verts)); + } + vertex_count += current_verts; + } + + prim_idx = 0; + for (i = 0; i < shader->vector_length; ++i) { + int num_prims = shader->llvm_emitted_primitives[i]; + for (j = 0; j < num_prims; ++j) { + int prim_length = + shader->llvm_prim_lengths[j][i]; + shader->primitive_lengths[shader->emitted_primitives + prim_idx] = + prim_length; + ++prim_idx; + } + } + + shader->emitted_primitives += total_prims; + shader->emitted_vertices += total_verts; +} + +static void +llvm_gs_prepare(struct draw_geometry_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) +{ +} + +static unsigned +llvm_gs_run(struct draw_geometry_shader *shader, + unsigned input_primitives) +{ + unsigned ret; + char *input = (char*)shader->gs_output; + + input += (shader->emitted_vertices * shader->vertex_size); + + ret = shader->current_variant->jit_func( + shader->jit_context, shader->gs_input->data, + (struct vertex_header*)input, + input_primitives, + shader->draw->instance_id); + + return ret; +} + +#endif + static void gs_flush(struct draw_geometry_shader *shader) { unsigned out_prim_count; @@ -219,13 +360,15 @@ static void gs_flush(struct draw_geometry_shader *shader) input_primitives <= 4); out_prim_count = shader->run(shader, input_primitives); + shader->fetch_outputs(shader, out_prim_count, + &shader->tmp_output); + #if 0 debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", shader->emitted_primitives, shader->emitted_vertices, out_prim_count); #endif - shader->fetch_outputs(shader, out_prim_count, - &shader->tmp_output); + shader->fetched_prim_count = 0; } @@ -331,8 +474,8 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, * Execute geometry shader. */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, const struct tgsi_shader_info *input_info, @@ -344,14 +487,20 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned num_outputs = shader->info.num_outputs; unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); unsigned num_input_verts = input_prim->linear ? - input_verts->count : - input_prim->count; + input_verts->count : + input_prim->count; unsigned num_in_primitives = - MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), - u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)); + align( + MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), + u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)), + shader->vector_length); unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive, shader->max_output_vertices) - * num_in_primitives; + * num_in_primitives; + + //Assume at least one primitive + max_out_prims = MAX2(max_out_prims, 1); + output_verts->vertex_size = vertex_size; output_verts->stride = output_verts->vertex_size; @@ -385,6 +534,34 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, FREE(shader->primitive_lengths); shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); + +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + shader->gs_output = output_verts->verts; + if (max_out_prims > shader->max_out_prims) { + unsigned i; + if (shader->llvm_prim_lengths) { + for (i = 0; i < shader->max_out_prims; ++i) { + align_free(shader->llvm_prim_lengths[i]); + } + FREE(shader->llvm_prim_lengths); + } + + shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*)); + for (i = 0; i < max_out_prims; ++i) { + int vector_size = shader->vector_length * sizeof(unsigned); + shader->llvm_prim_lengths[i] = + align_malloc(vector_size, vector_size); + } + + shader->max_out_prims = max_out_prims; + } + shader->jit_context->prim_lengths = shader->llvm_prim_lengths; + shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; + shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; + } +#endif + shader->prepare(shader, constants, constants_size); if (input_prim->linear) @@ -464,10 +641,27 @@ struct draw_geometry_shader * draw_create_geometry_shader(struct draw_context *draw, const struct pipe_shader_state *state) { +#ifdef HAVE_LLVM + struct llvm_geometry_shader *llvm_gs; +#endif struct draw_geometry_shader *gs; unsigned i; - gs = CALLOC_STRUCT(draw_geometry_shader); +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); + + if (llvm_gs == NULL) + return NULL; + + gs = &llvm_gs->base; + + make_empty_list(&llvm_gs->variants); + } else +#endif + { + gs = CALLOC_STRUCT(draw_geometry_shader); + } if (!gs) return NULL; @@ -486,6 +680,17 @@ draw_create_geometry_shader(struct draw_context *draw, gs->input_primitive = PIPE_PRIM_TRIANGLES; gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; gs->max_output_vertices = 32; + gs->max_out_prims = 0; + + if (draw_get_option_use_llvm()) { + /* TODO: change the input array to handle the following + vector length, instead of the currently hardcoded + TGSI_NUM_CHANNELS + gs->vector_length = lp_native_vector_width / 32;*/ + gs->vector_length = TGSI_NUM_CHANNELS; + } else { + gs->vector_length = TGSI_NUM_CHANNELS; + } for (i = 0; i < gs->info.num_properties; ++i) { if (gs->info.properties[i].name == @@ -507,10 +712,36 @@ draw_create_geometry_shader(struct draw_context *draw, gs->machine = draw->gs.tgsi.machine; - gs->fetch_outputs = tgsi_fetch_gs_outputs; - gs->fetch_inputs = tgsi_fetch_gs_input; - gs->prepare = tgsi_gs_prepare; - gs->run = tgsi_gs_run; +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + int vector_size = gs->vector_length * sizeof(float); + gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); + memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); + gs->llvm_prim_lengths = 0; + + gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size); + gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size); + + gs->fetch_outputs = llvm_fetch_gs_outputs; + gs->fetch_inputs = llvm_fetch_gs_input; + gs->prepare = llvm_gs_prepare; + gs->run = llvm_gs_run; + + gs->jit_context = &draw->llvm->gs_jit_context; + + + llvm_gs->variant_key_size = + draw_gs_llvm_variant_key_size( + MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1, + gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1)); + } else +#endif + { + gs->fetch_outputs = tgsi_fetch_gs_outputs; + gs->fetch_inputs = tgsi_fetch_gs_input; + gs->prepare = tgsi_gs_prepare; + gs->run = tgsi_gs_run; + } return gs; } @@ -535,7 +766,42 @@ void draw_bind_geometry_shader(struct draw_context *draw, void draw_delete_geometry_shader(struct draw_context *draw, struct draw_geometry_shader *dgs) { +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); + struct draw_gs_llvm_variant_list_item *li; + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_gs_llvm_variant_list_item *next = next_elem(li); + draw_gs_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + + if (dgs->llvm_prim_lengths) { + unsigned i; + for (i = 0; i < dgs->max_out_prims; ++i) { + align_free(dgs->llvm_prim_lengths[i]); + } + FREE(dgs->llvm_prim_lengths); + } + align_free(dgs->llvm_emitted_primitives); + align_free(dgs->llvm_emitted_vertices); + + align_free(dgs->gs_input); + } +#endif + FREE(dgs->primitive_lengths); FREE((void*) dgs->state.tokens); FREE(dgs); } + + +void draw_gs_set_current_variant(struct draw_geometry_shader *shader, + struct draw_gs_llvm_variant *variant) +{ + shader->current_variant = variant; +} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 7ab4f04a33d..e62b34a1794 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -31,11 +31,28 @@ #include "draw_context.h" #include "draw_private.h" - #define MAX_TGSI_PRIMITIVES 4 struct draw_context; +#ifdef HAVE_LLVM +struct draw_gs_jit_context; +struct draw_gs_llvm_variant; + +/** + * Structure holding the inputs to the geometry shader. It uses SOA layout. + * The dimensions are as follows: + * - maximum number of vertices for a geometry shader input primitive + * (6 for triangle_adjacency) + * - maximum number of attributes for each vertex + * - four channels per each attribute (x,y,z,w) + * - number of input primitives equal to the SOA vector length + */ +struct draw_gs_inputs { + float data[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS]; +}; +#endif + /** * Private version of the compiled geometry shader */ @@ -66,6 +83,19 @@ struct draw_geometry_shader { unsigned fetched_prim_count; const float (*input)[4]; const struct tgsi_shader_info *input_info; + unsigned vector_length; + unsigned max_out_prims; + +#ifdef HAVE_LLVM + struct draw_gs_inputs *gs_input; + struct draw_gs_jit_context *jit_context; + struct draw_gs_llvm_variant *current_variant; + struct vertex_header *gs_output; + + int **llvm_prim_lengths; + int *llvm_emitted_primitives; + int *llvm_emitted_vertices; +#endif void (*fetch_inputs)(struct draw_geometry_shader *shader, unsigned *indices, @@ -102,4 +132,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, int draw_gs_max_output_vertices(struct draw_geometry_shader *shader, unsigned pipe_prim); +void draw_gs_set_current_variant(struct draw_geometry_shader *shader, + struct draw_gs_llvm_variant *variant); + #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 602839d0d44..e46195b634e 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -29,6 +29,7 @@ #include "draw_context.h" #include "draw_vs.h" +#include "draw_gs.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_logic.h" @@ -228,6 +229,85 @@ create_jit_context_type(struct gallivm_state *gallivm, } +/** + * Create LLVM type for struct draw_gs_jit_context + */ +static LLVMTypeRef +create_gs_jit_context_type(struct gallivm_state *gallivm, + unsigned vector_length, + LLVMTypeRef texture_type, LLVMTypeRef sampler_type, + const char *struct_name) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef elem_types[8]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */ + LP_MAX_TGSI_CONST_BUFFERS); + elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), + DRAW_TOTAL_CLIP_PLANES), 0); + elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */ + + elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0); + elem_types[4] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); + elem_types[5] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); + + elem_types[6] = LLVMArrayType(texture_type, + PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ + elem_types[7] = LLVMArrayType(sampler_type, + PIPE_MAX_SAMPLERS); /* samplers */ + + context_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, context_type); + + LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif + + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants, + target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes, + target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport, + target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths, + target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices, + target, context_type, 4); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims, + target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures, + target, context_type, + DRAW_GS_JIT_CTX_TEXTURES); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers, + target, context_type, + DRAW_GS_JIT_CTX_SAMPLERS); + LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context, + target, context_type); + + return context_type; +} + + +static LLVMTypeRef +create_gs_jit_input_type(struct gallivm_state *gallivm) +{ + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef input_array; + + input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */ + input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */ + input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */ + input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */ + + return input_array; +} + /** * Create LLVM type for struct pipe_vertex_buffer */ @@ -397,6 +477,9 @@ draw_llvm_create(struct draw_context *draw) llvm->nr_variants = 0; make_empty_list(&llvm->vs_variants_list); + llvm->nr_gs_variants = 0; + make_empty_list(&llvm->gs_variants_list); + return llvm; } @@ -498,7 +581,8 @@ generate_vs(struct draw_llvm_variant *variant, inputs, outputs, sampler, - &llvm->draw->vs.vertex_shader->info); + &llvm->draw->vs.vertex_shader->info, + NULL); { LLVMValueRef out; @@ -695,6 +779,7 @@ static void store_aos_array(struct gallivm_state *gallivm, struct lp_type soa_type, LLVMValueRef io_ptr, + LLVMValueRef *indices, LLVMValueRef* aos, int attrib, int num_outputs, @@ -707,11 +792,15 @@ store_aos_array(struct gallivm_state *gallivm, LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; int vector_length = soa_type.length; int i; - + debug_assert(TGSI_NUM_CHANNELS == 4); for (i = 0; i < vector_length; i++) { - inds[i] = lp_build_const_int32(gallivm, i); + if (indices) { + inds[i] = indices[i]; + } else { + inds[i] = lp_build_const_int32(gallivm, i); + } io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); } @@ -753,6 +842,7 @@ store_aos_array(struct gallivm_state *gallivm, static void convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef io, + LLVMValueRef *indices, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef clipmask, int num_outputs, @@ -803,7 +893,7 @@ convert_to_aos(struct gallivm_state *gallivm, store_aos_array(gallivm, soa_type, - io, + io, indices, aos, attrib, num_outputs, @@ -821,14 +911,14 @@ convert_to_aos(struct gallivm_state *gallivm, static void store_clip(struct gallivm_state *gallivm, const struct lp_type vs_type, - LLVMValueRef io_ptr, + LLVMValueRef io_ptr, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], boolean pre_clip_pos, int idx) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef soa[4]; LLVMValueRef aos[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef indices[2]; + LLVMValueRef indices[2]; LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32]; @@ -836,7 +926,7 @@ store_clip(struct gallivm_state *gallivm, indices[0] = indices[1] = lp_build_const_int32(gallivm, 0); - + for (i = 0; i < vs_type.length; i++) { inds[i] = lp_build_const_int32(gallivm, i); io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); @@ -893,13 +983,13 @@ generate_viewport(struct draw_llvm_variant *variant, struct lp_type f32_type = vs_type; LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/ - LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr); /* for 1/w convention*/ out3 = LLVMBuildFDiv(builder, const1, out3, ""); LLVMBuildStore(builder, out3, outputs[0][3]); - + /* Viewport Mapping */ for (i=0; i<3; i++) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/ @@ -908,7 +998,7 @@ generate_viewport(struct draw_llvm_variant *variant, LLVMValueRef scale_i; LLVMValueRef trans_i; LLVMValueRef index; - + index = lp_build_const_int32(gallivm, i); scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); @@ -930,14 +1020,14 @@ generate_viewport(struct draw_llvm_variant *variant, /* store transformed outputs */ LLVMBuildStore(builder, out, outputs[0][i]); } - + } /** * Returns clipmask as nxi32 bitmask for the n vertices */ -static LLVMValueRef +static LLVMValueRef generate_clipmask(struct draw_llvm *llvm, struct gallivm_state *gallivm, struct lp_type vs_type, @@ -952,7 +1042,7 @@ generate_clipmask(struct draw_llvm *llvm, { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask; /* stores the clipmasks */ - LLVMValueRef test, temp; + LLVMValueRef test, temp; LLVMValueRef zero, shift; LLVMValueRef pos_x, pos_y, pos_z, pos_w; LLVMValueRef cv_x, cv_y, cv_z, cv_w; @@ -967,7 +1057,7 @@ generate_clipmask(struct draw_llvm *llvm, cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0); cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1); - + if (cd[0] != pos || cd[1] != pos) have_cd = true; @@ -1002,27 +1092,27 @@ generate_clipmask(struct draw_llvm *llvm, /* plane 1 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); temp = shift; - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = test; - + /* plane 2 */ test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - + /* plane 3 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); /* plane 4 */ test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } @@ -1031,22 +1121,22 @@ generate_clipmask(struct draw_llvm *llvm, if (clip_halfz) { /* plane 5 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - } + } else { /* plane 5 */ test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } /* plane 6 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - } + } if (clip_user) { LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); @@ -1118,7 +1208,7 @@ generate_clipmask(struct draw_llvm *llvm, /** * Returns boolean if any clipping has occurred - * Used zero/non-zero i32 value to represent boolean + * Used zero/non-zero i32 value to represent boolean */ static LLVMValueRef clipmask_booli32(struct gallivm_state *gallivm, @@ -1144,6 +1234,94 @@ clipmask_booli32(struct gallivm_state *gallivm, return ret; } +static void +draw_gs_llvm_emit_vertex(struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type gs_type = bld_base->base.type; + LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, + lp_int_type(gs_type), 0); + LLVMValueRef indices[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef max_output_vertices = + lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices); + LLVMValueRef io = variant->io_ptr; + unsigned i; + const struct tgsi_shader_info *gs_info = &variant->shader->base.info; + + for (i = 0; i < gs_type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + LLVMValueRef currently_emitted = + LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, ""); + indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, ""); + indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, ""); + } + + convert_to_aos(gallivm, io, indices, + outputs, clipmask, + gs_info->num_outputs, gs_type, + FALSE); +} + +static void +draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef prim_lengts_ptr = + draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr); + unsigned i; + + for (i = 0; i < bld_base->base.type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + LLVMValueRef prims_emitted = + LLVMBuildExtractElement(builder, emitted_prims_vec, ind, ""); + LLVMValueRef store_ptr; + LLVMValueRef num_vertices = + LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, ""); + + /*lp_build_printf(gallivm, "XXXX emitting vertices, %d\n\n", + num_vertices);*/ + + store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, ""); + store_ptr = LLVMBuildLoad(builder, store_ptr, ""); + store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, ""); + LLVMBuildStore(builder, num_vertices, store_ptr); + } +} + +static void +draw_gs_llvm_epilogue(struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef emitted_verts_ptr = + draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr); + LLVMValueRef emitted_prims_ptr = + draw_gs_jit_emitted_prims(gallivm, variant->context_ptr); + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + + emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, ""); + emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, ""); + + LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr); + LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr); +} static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, @@ -1323,7 +1501,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, &true_index, 1, ""); true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); } - + system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder, system_values.vertex_id, true_index, lp_build_const_int32(gallivm, i), ""); @@ -1387,11 +1565,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0); } - /* store clipmask in vertex header, - * original positions in clip - * and transformed positions in data - */ - convert_to_aos(gallivm, io, outputs, clipmask, + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(gallivm, io, NULL, outputs, clipmask, vs_info->num_outputs, vs_type, have_clipdist); } @@ -1437,8 +1615,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; + key->has_gs = llvm->draw->gs.geometry_shader != NULL; key->pad1 = 0; - key->pad2 = 0; /* All variants of this shader will have the same value for * nr_samplers. Not yet trying to compact away holes in the @@ -1487,6 +1665,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) debug_printf("bypass_viewport = %u\n", key->bypass_viewport); debug_printf("clip_halfz = %u\n", key->clip_halfz); debug_printf("need_edgeflags = %u\n", key->need_edgeflags); + debug_printf("has_gs = %u\n", key->has_gs); debug_printf("ucp_enable = %u\n", key->ucp_enable); for (i = 0 ; i < key->nr_vertex_elements; i++) { @@ -1504,6 +1683,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) void draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned shader_stage, unsigned sview_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t first_level, uint32_t last_level, @@ -1515,9 +1695,18 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned j; struct draw_jit_texture *jit_tex; - assert(sview_idx < Elements(draw->llvm->jit_context.textures)); + assert(shader_stage == PIPE_SHADER_VERTEX || + shader_stage == PIPE_SHADER_GEOMETRY); + + if (shader_stage == PIPE_SHADER_VERTEX) { + assert(sview_idx < Elements(draw->llvm->jit_context.textures)); - jit_tex = &draw->llvm->jit_context.textures[sview_idx]; + jit_tex = &draw->llvm->jit_context.textures[sview_idx]; + } else if (shader_stage == PIPE_SHADER_GEOMETRY) { + assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures)); + + jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx]; + } jit_tex->width = width; jit_tex->height = height; @@ -1551,6 +1740,19 @@ draw_llvm_set_sampler_state(struct draw_context *draw) COPY_4V(jit_sam->border_color, s->border_color.f); } } + + for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) { + struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i]; + + if (draw->samplers[i]) { + const struct pipe_sampler_state *s + = draw->samplers[PIPE_SHADER_GEOMETRY][i]; + jit_sam->min_lod = s->min_lod; + jit_sam->max_lod = s->max_lod; + jit_sam->lod_bias = s->lod_bias; + COPY_4V(jit_sam->border_color, s->border_color.f); + } + } } @@ -1577,3 +1779,298 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant) llvm->nr_variants--; FREE(variant); } + + +/** + * Create LLVM types for various structures. + */ +static void +create_gs_jit_types(struct draw_gs_llvm_variant *var) +{ + struct gallivm_state *gallivm = var->gallivm; + LLVMTypeRef texture_type, sampler_type, context_type; + + texture_type = create_jit_texture_type(gallivm, "texture"); + sampler_type = create_jit_sampler_type(gallivm, "sampler"); + + context_type = create_gs_jit_context_type(gallivm, + var->shader->base.vector_length, + texture_type, sampler_type, + "draw_gs_jit_context"); + var->context_ptr_type = LLVMPointerType(context_type, 0); + + var->input_array_type = create_gs_jit_input_type(gallivm); +} + +static LLVMTypeRef +get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant) +{ + if (!variant->context_ptr_type) + create_gs_jit_types(variant); + return variant->context_ptr_type; +} + +static LLVMValueRef +generate_mask_value(struct draw_gs_llvm_variant *variant, + struct lp_type gs_type) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bits[16]; + struct lp_type mask_type = lp_int_type(gs_type); + struct lp_type mask_elem_type = lp_elem_type(mask_type); + LLVMValueRef mask_val = lp_build_const_vec(gallivm, + mask_type, + 0); + unsigned i; + + assert(gs_type.length <= Elements(bits)); + + for (i = gs_type.length; i >= 1; --i) { + int idx = i - 1; + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + bits[idx] = lp_build_compare(gallivm, + mask_elem_type, PIPE_FUNC_GEQUAL, + variant->num_prims, ind); + } + for (i = 0; i < gs_type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, ""); + } + mask_val = lp_build_compare(gallivm, + mask_type, PIPE_FUNC_NOTEQUAL, + mask_val, + lp_build_const_int_vec(gallivm, mask_type, 0)); + + return mask_val; +} + +static void +draw_gs_llvm_generate(struct draw_llvm *llvm, + struct draw_gs_llvm_variant *variant) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); + LLVMTypeRef arg_types[5]; + LLVMTypeRef func_type; + LLVMValueRef variant_func; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef io_ptr, input_array, num_prims, mask_val; + struct lp_build_sampler_soa *sampler = 0; + struct lp_build_context bld; + struct lp_bld_tgsi_system_values system_values; + struct lp_type gs_type; + unsigned i; + struct lp_build_tgsi_gs_iface gs_iface; + const struct tgsi_token *tokens = variant->shader->base.state.tokens; + LLVMValueRef consts_ptr; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + struct lp_build_mask_context mask; + + memset(&system_values, 0, sizeof(system_values)); + + assert(variant->vertex_header_ptr_type); + + arg_types[0] = get_gs_context_ptr_type(variant); /* context */ + arg_types[1] = variant->input_array_type; /* input */ + arg_types[2] = variant->vertex_header_ptr_type; /* vertex_header */ + arg_types[3] = int32_type; /* num_prims */ + arg_types[4] = int32_type; /* instance_id */ + + func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); + + variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader", + func_type); + variant->function = variant_func; + + LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); + + for (i = 0; i < Elements(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant_func, i), + LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant_func, 0); + input_array = LLVMGetParam(variant_func, 1); + io_ptr = LLVMGetParam(variant_func, 2); + num_prims = LLVMGetParam(variant_func, 3); + system_values.instance_id = LLVMGetParam(variant_func, 4); + + lp_build_name(context_ptr, "context"); + lp_build_name(input_array, "input"); + lp_build_name(io_ptr, "io"); + lp_build_name(io_ptr, "num_prims"); + lp_build_name(system_values.instance_id, "instance_id"); + + variant->context_ptr = context_ptr; + variant->io_ptr = io_ptr; + variant->num_prims = num_prims; + + gs_iface.input = input_array; + gs_iface.emit_vertex = draw_gs_llvm_emit_vertex; + gs_iface.end_primitive = draw_gs_llvm_end_primitive; + gs_iface.gs_epilogue = draw_gs_llvm_epilogue; + gs_iface.user_data = variant; + + /* + * Function body + */ + + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); + builder = gallivm->builder; + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, gallivm, lp_type_int(32)); + + memset(&gs_type, 0, sizeof gs_type); + gs_type.floating = TRUE; /* floating point values */ + gs_type.sign = TRUE; /* values are signed */ + gs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + gs_type.width = 32; /* 32-bit float */ + gs_type.length = variant->shader->base.vector_length; + + consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr); + + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.samplers, + context_ptr); + + mask_val = generate_mask_value(variant, gs_type); + lp_build_mask_begin(&mask, gallivm, gs_type, mask_val); + + lp_build_tgsi_soa(variant->gallivm, + tokens, + gs_type, + &mask, + consts_ptr, + &system_values, + NULL /*pos*/, + NULL, + outputs, + sampler, + &llvm->draw->gs.geometry_shader->info, + &gs_iface); + + lp_build_mask_end(&mask); + + LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32))); + + gallivm_verify_function(gallivm, variant_func); +} + + +struct draw_gs_llvm_variant * +draw_gs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_outputs, + const struct draw_gs_llvm_variant_key *key) +{ + struct draw_gs_llvm_variant *variant; + struct llvm_geometry_shader *shader = + llvm_geometry_shader(llvm->draw->gs.geometry_shader); + LLVMTypeRef vertex_header; + + variant = MALLOC(sizeof *variant + + shader->variant_key_size - + sizeof variant->key); + if (variant == NULL) + return NULL; + + variant->llvm = llvm; + variant->shader = shader; + + variant->gallivm = gallivm_create(); + + create_gs_jit_types(variant); + + memcpy(&variant->key, key, shader->variant_key_size); + + vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs); + + variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); + + draw_gs_llvm_generate(llvm, variant); + + gallivm_compile_module(variant->gallivm); + + variant->jit_func = (draw_gs_jit_func) + gallivm_jit_function(variant->gallivm, variant->function); + + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + + return variant; +} + +void +draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + + if (variant->function) { + gallivm_free_function(variant->gallivm, + variant->function, variant->jit_func); + } + + gallivm_destroy(variant->gallivm); + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_gs_variants--; + FREE(variant); +} + +struct draw_gs_llvm_variant_key * +draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store) +{ + unsigned i; + struct draw_gs_llvm_variant_key *key; + struct draw_sampler_static_state *draw_sampler; + + key = (struct draw_gs_llvm_variant_key *)store; + + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key->nr_sampler_views = + llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + } + else { + key->nr_sampler_views = key->nr_samplers; + } + + draw_sampler = key->samplers; + + memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); + + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, + llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]); + } + for (i = 0 ; i < key->nr_sampler_views; i++) { + lp_sampler_static_texture_state(&draw_sampler[i].texture_state, + llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]); + } + + return key; +} + +void +draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key) +{ + unsigned i; + struct draw_sampler_static_state *sampler = key->samplers; + + for (i = 0 ; i < key->nr_sampler_views; i++) { + debug_printf("sampler[%i].src_format = %s\n", i, + util_format_name(sampler[i].texture_state.format)); + } +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index c03c69e57b3..fc0d2bd4bf1 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -31,6 +31,8 @@ #include "draw/draw_private.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" + #include "gallivm/lp_bld_sample.h" #include "gallivm/lp_bld_limits.h" @@ -40,6 +42,7 @@ struct draw_llvm; struct llvm_vertex_shader; +struct llvm_geometry_shader; struct draw_jit_texture { @@ -166,6 +169,61 @@ struct draw_jit_context lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") +/** + * This structure is passed directly to the generated geometry shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_gs_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_gs_jit_context +{ + const float *constants[LP_MAX_TGSI_CONST_BUFFERS]; + float (*planes) [DRAW_TOTAL_CLIP_PLANES][4]; + float *viewport; + + int **prim_lengths; + int *emitted_vertices; + int *emitted_prims; + + struct draw_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct draw_jit_sampler samplers[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_gs_jit_context_constants(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, 0, "constants") + +#define draw_gs_jit_context_planes(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 1, "planes") + +#define draw_gs_jit_context_viewport(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 2, "viewport") + +#define draw_gs_jit_prim_lengths(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 3, "prim_lengths") + +#define draw_gs_jit_emitted_vertices(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 4, "emitted_vertices") + +#define draw_gs_jit_emitted_prims(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 5, "emitted_prims") + +#define DRAW_GS_JIT_CTX_TEXTURES 6 +#define DRAW_GS_JIT_CTX_SAMPLERS 7 + +#define draw_gs_jit_context_textures(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_TEXTURES, "textures") + +#define draw_gs_jit_context_samplers(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_SAMPLERS, "samplers") + + + typedef int (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, @@ -187,6 +245,14 @@ typedef int struct pipe_vertex_buffer *vertex_buffers, unsigned instance_id); + +typedef int +(*draw_gs_jit_func)(struct draw_gs_jit_context *context, + float inputs[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS], + struct vertex_header *output, + unsigned num_prims, + unsigned instance_id); + struct draw_llvm_variant_key { unsigned nr_vertex_elements:8; @@ -199,13 +265,13 @@ struct draw_llvm_variant_key unsigned clip_halfz:1; unsigned bypass_viewport:1; unsigned need_edgeflags:1; + unsigned has_gs:1; /* * it is important there are no holes in this struct * (and all padding gets zeroed). */ - unsigned pad1:1; unsigned ucp_enable:PIPE_MAX_CLIP_PLANES; - unsigned pad2:32-PIPE_MAX_CLIP_PLANES; + unsigned pad1:32-PIPE_MAX_CLIP_PLANES; /* Variable number of vertex elements: */ @@ -216,11 +282,23 @@ struct draw_llvm_variant_key /* struct draw_sampler_static_state sampler; */ }; +struct draw_gs_llvm_variant_key +{ + unsigned nr_samplers:8; + unsigned nr_sampler_views:8; + + struct draw_sampler_static_state samplers[1]; +}; + #define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \ (sizeof(struct draw_llvm_variant_key) + \ PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state) + \ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element)) +#define DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE \ + (sizeof(struct draw_gs_llvm_variant_key) + \ + PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state)) + static INLINE size_t draw_llvm_variant_key_size(unsigned nr_vertex_elements, @@ -232,6 +310,14 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements, } +static INLINE size_t +draw_gs_llvm_variant_key_size(unsigned nr_samplers) +{ + return (sizeof(struct draw_gs_llvm_variant_key) + + (nr_samplers - 1) * sizeof(struct draw_sampler_static_state)); +} + + static INLINE struct draw_sampler_static_state * draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key) { @@ -246,6 +332,13 @@ struct draw_llvm_variant_list_item struct draw_llvm_variant_list_item *next, *prev; }; +struct draw_gs_llvm_variant_list_item +{ + struct draw_gs_llvm_variant *base; + struct draw_gs_llvm_variant_list_item *next, *prev; +}; + + struct draw_llvm_variant { struct gallivm_state *gallivm; @@ -271,6 +364,32 @@ struct draw_llvm_variant struct draw_llvm_variant_key key; }; + +struct draw_gs_llvm_variant +{ + struct gallivm_state *gallivm; + + /* LLVM JIT builder types */ + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef input_array_type; + + LLVMValueRef context_ptr; + LLVMValueRef io_ptr; + LLVMValueRef num_prims; + LLVMValueRef function; + draw_gs_jit_func jit_func; + + struct llvm_geometry_shader *shader; + + struct draw_llvm *llvm; + struct draw_gs_llvm_variant_list_item list_item_global; + struct draw_gs_llvm_variant_list_item list_item_local; + + /* key is variable-sized, must be last */ + struct draw_gs_llvm_variant_key key; +}; + struct llvm_vertex_shader { struct draw_vertex_shader base; @@ -280,13 +399,27 @@ struct llvm_vertex_shader { unsigned variants_cached; }; +struct llvm_geometry_shader { + struct draw_geometry_shader base; + + unsigned variant_key_size; + struct draw_gs_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; + + struct draw_llvm { struct draw_context *draw; struct draw_jit_context jit_context; + struct draw_gs_jit_context gs_jit_context; struct draw_llvm_variant_list_item vs_variants_list; int nr_variants; + + struct draw_gs_llvm_variant_list_item gs_variants_list; + int nr_gs_variants; }; @@ -296,6 +429,14 @@ llvm_vertex_shader(struct draw_vertex_shader *vs) return (struct llvm_vertex_shader *)vs; } +static INLINE struct llvm_geometry_shader * +llvm_geometry_shader(struct draw_geometry_shader *gs) +{ + return (struct llvm_geometry_shader *)gs; +} + + + struct draw_llvm * draw_llvm_create(struct draw_context *draw); @@ -317,6 +458,21 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store); void draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key); + +struct draw_gs_llvm_variant * +draw_gs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_vertex_header_attribs, + const struct draw_gs_llvm_variant_key *key); + +void +draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant); + +struct draw_gs_llvm_variant_key * +draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store); + +void +draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key); + struct lp_build_sampler_soa * draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state, LLVMValueRef context_ptr); @@ -326,6 +482,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw); void draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned shader_stage, unsigned sview_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t first_level, uint32_t last_level, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index fab168c6d47..ec0f758d453 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -57,6 +57,71 @@ struct llvm_middle_end { }; +static void +llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) +{ + struct draw_context *draw = fpme->draw; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_gs_llvm_variant_key *key; + struct draw_gs_llvm_variant *variant = NULL; + struct draw_gs_llvm_variant_list_item *li; + struct llvm_geometry_shader *shader = llvm_geometry_shader(gs); + char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE]; + unsigned i; + + key = draw_gs_llvm_make_variant_key(fpme->llvm, store); + + /* Search shader's list of variants for the key */ + li = first_elem(&shader->variants); + while (!at_end(&shader->variants, li)) { + if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) { + variant = li->base; + break; + } + li = next_elem(li); + } + + if (variant) { + /* found the variant, move to head of global list (for LRU) */ + move_to_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + } + else { + /* Need to create new variant */ + + /* First check if we've created too many variants. If so, free + * 25% of the LRU to avoid using too much memory. + */ + if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_gs_llvm_variant_list_item *item; + if (is_empty_list(&fpme->llvm->gs_variants_list)) { + break; + } + item = last_elem(&fpme->llvm->gs_variants_list); + assert(item); + assert(item->base); + draw_gs_llvm_destroy_variant(item->base); + } + } + + variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key); + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + fpme->llvm->nr_gs_variants++; + shader->variants_cached++; + } + } + + gs->current_variant = variant; +} + /** * Prepare/validate middle part of the vertex pipeline. * NOTE: if you change this function, also look at the non-LLVM @@ -180,6 +245,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, fpme->current_variant = variant; } + + if (gs) { + llvm_middle_end_prepare_gs(fpme); + } } @@ -199,11 +268,17 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) { fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; } + for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) { + fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; + } fpme->llvm->jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; + fpme->llvm->gs_jit_context.planes = + (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; fpme->llvm->jit_context.viewport = (float *) draw->viewport.scale; + fpme->llvm->gs_jit_context.viewport = (float *) draw->viewport.scale; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 6e65e126d68..62d4707371f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -61,6 +61,7 @@ struct tgsi_shader_info; struct lp_build_mask_context; struct gallivm_state; struct lp_derivatives; +struct lp_build_tgsi_gs_iface; enum lp_build_tex_modifier { @@ -224,7 +225,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], struct lp_build_sampler_soa *sampler, - const struct tgsi_shader_info *info); + const struct tgsi_shader_info *info, + const struct lp_build_tgsi_gs_iface *gs_iface); void @@ -361,6 +363,24 @@ struct lp_build_tgsi_context void (*emit_epilogue)(struct lp_build_tgsi_context*); }; +struct lp_build_tgsi_gs_iface +{ + LLVMValueRef input; + void (*emit_vertex)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec, + void *user_data); + void (*end_primitive)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec, + void *user_data); + void (*gs_epilogue)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec, + void *user_data); + void *user_data; +}; + struct lp_build_tgsi_soa_context { struct lp_build_tgsi_context bld_base; @@ -368,6 +388,11 @@ struct lp_build_tgsi_soa_context /* Builder for scalar elements of shader's data type (float) */ struct lp_build_context elem_bld; + const struct lp_build_tgsi_gs_iface *gs_iface; + LLVMValueRef emitted_prims_vec; + LLVMValueRef total_emitted_vertices_vec; + LLVMValueRef emitted_vertices_vec; + LLVMValueRef consts_ptr; const LLVMValueRef *pos; const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index cafc61f60f6..6f174a5b50e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -437,6 +437,26 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, } } +/* + * If we have indirect addressing in outputs copy our alloca array + * to the outputs slots specified by the caller to make sure + * our outputs are delivered consistently via the same interface. + */ +static void +gather_outputs(struct lp_build_tgsi_soa_context * bld) +{ + if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + unsigned index, chan; + assert(bld->bld_base.info->num_outputs <= + bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); + for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); + } + } + } +} + /** * Gather vector. * XXX the lp_build_gather() function should be capable of doing this @@ -757,6 +777,60 @@ emit_fetch_input( return res; } + +static LLVMValueRef +emit_fetch_gs_input( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + enum tgsi_opcode_type stype, + unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + //struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef attrib_index = NULL; + LLVMValueRef vertex_index = NULL; + LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); + LLVMValueRef indices[3]; + LLVMValueRef res; + + if (reg->Register.Indirect) { + attrib_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); + } + + if (reg->Dimension.Indirect) { + vertex_index = get_indirect_index(bld, + reg->Register.File, + reg->Dimension.Index, + ®->DimIndirect); + } else { + vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); + } + + indices[0] = vertex_index; + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, bld->gs_iface->input, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + + assert(res); + + if (stype == TGSI_TYPE_UNSIGNED) { + res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); + } else if (stype == TGSI_TYPE_SIGNED) { + res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); + } + + return res; +} + static LLVMValueRef emit_fetch_temporary( struct lp_build_tgsi_context * bld_base, @@ -2081,6 +2155,66 @@ sviewinfo_emit( emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); } +static LLVMValueRef +mask_to_one_vec(struct lp_build_tgsi_context *bld_base) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef one_vec = bld_base->int_bld.one; + struct lp_exec_mask *exec_mask = &bld->exec_mask; + + if (exec_mask->has_mask) { + one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, ""); + } + one_vec = LLVMBuildAnd(builder, one_vec, + lp_build_mask_value(bld->mask), ""); + return one_vec; +} + +static void +emit_vertex( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + if (bld->gs_iface->emit_vertex) { + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); + gather_outputs(bld); + bld->gs_iface->emit_vertex(&bld->bld_base, bld->outputs, + bld->total_emitted_vertices_vec, + bld->gs_iface->user_data); + bld->emitted_vertices_vec = + LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, ""); + bld->total_emitted_vertices_vec = + LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, ""); + } +} + + +static void +end_primitive( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + if (bld->gs_iface->end_primitive) { + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); + bld->gs_iface->end_primitive(&bld->bld_base, + bld->emitted_vertices_vec, + bld->emitted_prims_vec, + bld->gs_iface->user_data); + bld->emitted_prims_vec = + LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, ""); + bld->emitted_vertices_vec = bld_base->uint_bld.zero; + } +} + static void cal_emit( const struct lp_build_tgsi_action * action, @@ -2324,7 +2458,7 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base) /* If we have indirect addressing in inputs we need to copy them into * our alloca array to be able to iterate over them */ - if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { unsigned index, chan; LLVMTypeRef vec_type = bld_base->base.vec_type; LLVMValueRef array_size = lp_build_const_int32(gallivm, @@ -2349,6 +2483,13 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base) } } } + + if (bld->gs_iface) { + struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; + bld->emitted_prims_vec = uint_bld->zero; + bld->emitted_vertices_vec = uint_bld->zero; + bld->total_emitted_vertices_vec = uint_bld->zero; + } } static void emit_epilogue(struct lp_build_tgsi_context * bld_base) @@ -2361,16 +2502,14 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base) } /* If we have indirect addressing in outputs we need to copy our alloca array - * to the outputs slots specified by the called */ - if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { - unsigned index, chan; - assert(bld_base->info->num_outputs <= - bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); - for (index = 0; index < bld_base->info->num_outputs; ++index) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); - } - } + * to the outputs slots specified by the caller */ + if (bld->gs_iface) { + bld->gs_iface->gs_epilogue(&bld->bld_base, + bld->total_emitted_vertices_vec, + bld->emitted_prims_vec, + bld->gs_iface->user_data); + } else { + gather_outputs(bld); } } @@ -2385,7 +2524,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], struct lp_build_sampler_soa *sampler, - const struct tgsi_shader_info *info) + const struct tgsi_shader_info *info, + const struct lp_build_tgsi_gs_iface *gs_iface) { struct lp_build_tgsi_soa_context bld; @@ -2463,6 +2603,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; + if (gs_iface) { + /* inputs are always indirect with gs */ + bld.indirect_files |= (1 << TGSI_FILE_INPUT); + bld.gs_iface = gs_iface; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; + bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; + bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; + } + lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); bld.system_values = *system_values; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index de51f39f159..ea41bd64720 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -354,7 +354,7 @@ generate_fs(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, &system_values, interp->pos, interp->inputs, - outputs, sampler, &shader->info.base); + outputs, sampler, &shader->info.base, NULL); /* Alpha test */ if (key->alpha.enabled) { @@ -607,7 +607,7 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, &system_values, interp->pos, interp->inputs, - outputs, sampler, &shader->info.base); + outputs, sampler, &shader->info.base, NULL); /* Alpha test */ if (key->alpha.enabled) {