draw: unify linear and elts draw jit functions
authorRoland Scheidegger <sroland@vmware.com>
Sun, 13 Nov 2016 15:32:24 +0000 (16:32 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Mon, 21 Nov 2016 19:02:53 +0000 (20:02 +0100)
The code for elts and linear paths was nearly 100% identical by now - with
the elts path simply having some additional gather for the elements in the
main loop (with some additional small differences before the main loop).

Hence nuke the separate functions and decide this at jit shader execution
time (simply based on the presence of the elts pointer).

Some analysis shows that the generated vs jit functions seem to be just very
minimally more complex than the former elts functions, and almost none of the
additional complexity is in the main loop (basically just the branch logic
for the branch fetching the actual indices).
Compared to linear, the codesize of the function is of course a bit larger,
however the actual executed code in the main loop appears to be near 100%
identical (the additional code looking up indices is skipped as expected).

So, I would not expect a (meaningful) performance difference with the
generated code, neither with elts nor linear, this does however roughly
half the compilation time (the compiled shaders should also use only half
the memory of course).

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/draw/draw_llvm.c
src/gallium/auxiliary/draw/draw_llvm.h
src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c

index a02c893fb6e09b86cdcc10a2aa3558b62c0a9e41..2478b1157cd9637a6c5842beccd96e527dd60101 100644 (file)
@@ -63,8 +63,7 @@
 
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
-                   boolean elts);
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
 
 
 struct draw_gs_llvm_iface {
@@ -577,17 +576,13 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
 
    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
 
-   draw_llvm_generate(llvm, variant, FALSE);  /* linear */
-   draw_llvm_generate(llvm, variant, TRUE);   /* elts */
+   draw_llvm_generate(llvm, variant);
 
    gallivm_compile_module(variant->gallivm);
 
    variant->jit_func = (draw_jit_vert_func)
          gallivm_jit_function(variant->gallivm, variant->function);
 
-   variant->jit_func_elts = (draw_jit_vert_func_elts)
-         gallivm_jit_function(variant->gallivm, variant->function_elts);
-
    gallivm_free_ir(variant->gallivm);
 
    variant->list_item_global.base = variant;
@@ -1565,25 +1560,23 @@ draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
 }
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
-                   boolean elts)
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
 {
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMContextRef context = gallivm->context;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
    LLVMTypeRef arg_types[11];
-   unsigned num_arg_types =
-      elts ? ARRAY_SIZE(arg_types) : ARRAY_SIZE(arg_types) - 1;
+   unsigned num_arg_types = ARRAY_SIZE(arg_types);
    LLVMTypeRef func_type;
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    char func_name[64];
    struct lp_type vs_type;
-   LLVMValueRef count, fetch_elts, fetch_elt_max;
-   LLVMValueRef vertex_id_offset, start_instance, start;
+   LLVMValueRef count, fetch_elts, start_or_maxelt, start;
+   LLVMValueRef vertex_id_offset, start_instance;
    LLVMValueRef stride, step, io_itr;
-   LLVMValueRef ind_vec;
+   LLVMValueRef ind_vec, ind_vec_store, have_elts, fetch_max, tmp;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
@@ -1597,9 +1590,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    unsigned i, j;
    struct lp_build_context bld, bldivec, blduivec;
    struct lp_build_loop_state lp_loop;
+   struct lp_build_if_state if_ctx;
    const int vector_length = lp_native_vector_width / 32;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-   LLVMValueRef fetch_max;
    struct lp_build_sampler_soa *sampler = 0;
    LLVMValueRef ret, clipmask_bool_ptr;
    struct draw_llvm_variant_key *key = &variant->key;
@@ -1624,8 +1617,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
 
    memset(&system_values, 0, sizeof(system_values));
 
-   util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u_%s",
-                 variant->shader->variants_cached, elts ? "elts" : "linear");
+   util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u",
+                 variant->shader->variants_cached);
 
    i = 0;
    arg_types[i++] = get_context_ptr_type(variant);       /* context */
@@ -1638,19 +1631,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    arg_types[i++] = int32_type;                          /* instance_id */
    arg_types[i++] = int32_type;                          /* vertex_id_offset */
    arg_types[i++] = int32_type;                          /* start_instance */
-   if (elts) {
-      arg_types[i++] = LLVMPointerType(int32_type, 0);   /* fetch_elts  */
-   }
+   arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
 
    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
                                 arg_types, num_arg_types, 0);
 
    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
-
-   if (elts)
-      variant->function_elts = variant_func;
-   else
-      variant->function = variant_func;
+   variant->function = variant_func;
 
    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
    for (i = 0; i < num_arg_types; ++i)
@@ -1661,6 +1648,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    io_ptr                    = LLVMGetParam(variant_func, 1);
    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
    count                     = LLVMGetParam(variant_func, 3);
+   start_or_maxelt           = LLVMGetParam(variant_func, 4);
    /*
     * XXX: stride is actually unused. The stride we use is strictly calculated
     * from the number of outputs (including the draw_extra outputs).
@@ -1672,29 +1660,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
    system_values.instance_id = LLVMGetParam(variant_func, 7);
    vertex_id_offset          = LLVMGetParam(variant_func, 8);
    start_instance            = LLVMGetParam(variant_func, 9);
+   fetch_elts                = LLVMGetParam(variant_func, 10);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
    lp_build_name(vbuffers_ptr, "vbuffers");
    lp_build_name(count, "count");
+   lp_build_name(start_or_maxelt, "start_or_maxelt");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
    lp_build_name(system_values.instance_id, "instance_id");
    lp_build_name(vertex_id_offset, "vertex_id_offset");
    lp_build_name(start_instance, "start_instance");
-
-   if (elts) {
-      fetch_elt_max = LLVMGetParam(variant_func, 4);
-      fetch_elts    = LLVMGetParam(variant_func, 10);
-      lp_build_name(fetch_elts, "fetch_elts");
-      lp_build_name(fetch_elt_max, "fetch_elt_max");
-      start = NULL;
-   }
-   else {
-      start        = LLVMGetParam(variant_func, 4);
-      lp_build_name(start, "start");
-      fetch_elts = NULL;
-   }
+   lp_build_name(fetch_elts, "fetch_elts");
 
    /*
     * Function body
@@ -1735,19 +1713,30 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
    }
 
+   fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max");
+   ind_vec_store = lp_build_alloca(gallivm, bldivec.vec_type, "ind_vec");
 
-   if (elts) {
-      fetch_max = count;
-      start = blduivec.zero;
+   have_elts = LLVMBuildICmp(builder, LLVMIntNE,
+                             LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
+   lp_build_if(&if_ctx, gallivm, have_elts);
+   {
+      LLVMBuildStore(builder, ind_vec, ind_vec_store);
+      LLVMBuildStore(builder, count, fetch_max);
    }
-   else {
-      fetch_max = lp_build_add(&bld, start, count);
-      start = lp_build_broadcast_scalar(&blduivec, start);
-      ind_vec = lp_build_add(&blduivec, start, ind_vec);
+   lp_build_else(&if_ctx);
+   {
+      tmp = lp_build_add(&bld, count, start_or_maxelt);
+      LLVMBuildStore(builder, tmp, fetch_max);
+      start = lp_build_broadcast_scalar(&bldivec, start_or_maxelt);
+      tmp = lp_build_add(&bldivec, start, ind_vec);
+      LLVMBuildStore(builder, tmp, ind_vec_store);
    }
+   lp_build_endif(&if_ctx);
 
+   fetch_max = LLVMBuildLoad(builder, fetch_max, "");
    fetch_max = LLVMBuildSub(builder, fetch_max, bld.one, "fetch_max");
-   fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
+   fetch_max = lp_build_broadcast_scalar(&bldivec, fetch_max);
+   ind_vec = LLVMBuildLoad(builder, ind_vec_store, "");
 
    /*
     * Pre-calculate everything which is constant per shader invocation.
@@ -1847,7 +1836,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
       LLVMValueRef io;
       LLVMValueRef clipmask;   /* holds the clipmask value */
-      LLVMValueRef true_index_array;
+      LLVMValueRef true_index_array, index_store;
       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
 
       io_itr = lp_loop.counter;
@@ -1875,8 +1864,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
        */
       true_index_array = lp_build_min(&bldivec, true_index_array, fetch_max);
 
-      if (elts) {
+      index_store = lp_build_alloca_undef(gallivm, bldivec.vec_type, "index_store");
+      LLVMBuildStore(builder, true_index_array, index_store);
 
+      lp_build_if(&if_ctx, gallivm, have_elts);
+      {
          /*
           * Note: you'd expect some comparison/clamp against fetch_elt_max
           * here.
@@ -1906,7 +1898,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                                             32, 32, TRUE,
                                             fetch_elts, true_index_array,
                                             FALSE);
+         LLVMBuildStore(builder, true_index_array, index_store);
       }
+      lp_build_endif(&if_ctx);
+
+      true_index_array = LLVMBuildLoad(builder, index_store, "");
 
       for (j = 0; j < key->nr_vertex_elements; ++j) {
          struct pipe_vertex_element *velem = &key->vertex_element[j];
index 8d4b4ae6db4acd3bdc5cc023a539ab55d4dadaf8..2ddf249672e2c44f6f09d69435f0f99252c73d8d 100644 (file)
@@ -266,26 +266,13 @@ typedef boolean
                       struct vertex_header *io,
                       const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS],
                       unsigned count,
-                      unsigned start,
+                      unsigned start_or_maxelt,
                       unsigned stride,
                       struct pipe_vertex_buffer *vertex_buffers,
                       unsigned instance_id,
                       unsigned vertex_id_offset,
-                      unsigned start_instance);
-
-
-typedef boolean
-(*draw_jit_vert_func_elts)(struct draw_jit_context *context,
-                           struct vertex_header *io,
-                           const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS],
-                           unsigned count,
-                           unsigned fetch_max_elt,
-                           unsigned stride,
-                           struct pipe_vertex_buffer *vertex_buffers,
-                           unsigned instance_id,
-                           unsigned vertex_id_offset,
-                           unsigned start_instance,
-                           const unsigned *fetch_elts);
+                      unsigned start_instance,
+                      const unsigned *fetch_elts);
 
 
 typedef int
@@ -393,9 +380,7 @@ struct draw_llvm_variant
    LLVMTypeRef vertex_header_ptr_type;
 
    LLVMValueRef function;
-   LLVMValueRef function_elts;
    draw_jit_vert_func jit_func;
-   draw_jit_vert_func_elts jit_func_elts;
 
    struct llvm_vertex_shader *shader;
 
index 73069c9de65c84987218406df129919309d57672..0277cbfc83a43d61d12fc0ca74f2f0a44417d389 100644 (file)
@@ -354,6 +354,8 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
    boolean free_prim_info = FALSE;
    unsigned opt = fpme->opt;
    boolean clipped = 0;
+   unsigned start_or_maxelt, vid_base;
+   const unsigned *elts;
 
    llvm_vert_info.count = fetch_info->count;
    llvm_vert_info.vertex_size = fpme->vertex_size;
@@ -373,29 +375,27 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
       draw->statistics.vs_invocations += fetch_info->count;
    }
 
-   if (fetch_info->linear)
-      clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context,
-                                       llvm_vert_info.verts,
-                                       draw->pt.user.vbuffer,
-                                       fetch_info->count,
-                                       fetch_info->start,
-                                       fpme->vertex_size,
-                                       draw->pt.vertex_buffer,
-                                       draw->instance_id,
-                                       draw->start_index,
-                                       draw->start_instance);
-   else
-      clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
-                                            llvm_vert_info.verts,
-                                            draw->pt.user.vbuffer,
-                                            fetch_info->count,
-                                            draw->pt.user.eltMax,
-                                            fpme->vertex_size,
-                                            draw->pt.vertex_buffer,
-                                            draw->instance_id,
-                                            draw->pt.user.eltBias,
-                                            draw->start_instance,
-                                            fetch_info->elts);
+   if (fetch_info->linear) {
+      start_or_maxelt = fetch_info->start;
+      vid_base = draw->start_index;
+      elts = NULL;
+   }
+   else {
+      start_or_maxelt = draw->pt.user.eltMax;
+      vid_base = draw->pt.user.eltBias;
+      elts = fetch_info->elts;
+   }
+   clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context,
+                                             llvm_vert_info.verts,
+                                             draw->pt.user.vbuffer,
+                                             fetch_info->count,
+                                             start_or_maxelt,
+                                             fpme->vertex_size,
+                                             draw->pt.vertex_buffer,
+                                             draw->instance_id,
+                                             vid_base,
+                                             draw->start_instance,
+                                             elts);
 
    /* Finished with fetch and vs:
     */