From 0d02a7b8ca794a594c2e9cc5e6d63dc591593105 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Feb 2020 17:09:10 +1000 Subject: [PATCH] draw: add main tessellation code This is the bulk of the llvm shader builders and tessellation execution code. TCS uses a coroutine launcher like compute shaders to handle barriers. It executes 4-wide with one input vertex per lane. Tessellation happens before the TES is run. TES is just a 4-wide launcher, one per primitive is executed, with one lane per tessellation coordinate input. Reviewed-by: Roland Scheidegger Part-of: --- src/gallium/auxiliary/Makefile.sources | 2 + src/gallium/auxiliary/draw/draw_context.c | 31 + src/gallium/auxiliary/draw/draw_context.h | 23 + src/gallium/auxiliary/draw/draw_llvm.c | 1121 ++++++++++++++++++++- src/gallium/auxiliary/draw/draw_llvm.h | 187 +++- src/gallium/auxiliary/draw/draw_private.h | 40 +- src/gallium/auxiliary/draw/draw_tess.c | 630 ++++++++++++ src/gallium/auxiliary/draw/draw_tess.h | 90 ++ src/gallium/auxiliary/meson.build | 2 + 9 files changed, 2119 insertions(+), 7 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_tess.c diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 8b0c19e1731..b79dc1d5958 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -50,6 +50,8 @@ C_SOURCES := \ draw/draw_pt_vsplit_tmp.h \ draw/draw_so_emit_tmp.h \ draw/draw_split_tmp.h \ + draw/draw_tess.c \ + draw/draw_tess.h \ draw/draw_vbuf.h \ draw/draw_vertex.c \ draw/draw_vertex.h \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index fb0b22eae43..70dccda9972 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -746,6 +746,37 @@ draw_total_gs_outputs(const struct draw_context *draw) return info->num_outputs + draw->extra_shader_outputs.num; } +/** + * Return total number of the tess ctrl shader outputs. + */ +uint +draw_total_tcs_outputs(const struct draw_context *draw) +{ + const struct tgsi_shader_info *info; + + if (!draw->tcs.tess_ctrl_shader) + return 0; + + info = &draw->tcs.tess_ctrl_shader->info; + + return info->num_outputs; +} + +/** + * Return total number of the tess eval shader outputs. + */ +uint +draw_total_tes_outputs(const struct draw_context *draw) +{ + const struct tgsi_shader_info *info; + + if (!draw->tes.tess_eval_shader) + return 0; + + info = &draw->tes.tess_eval_shader->info; + + return info->num_outputs + draw->extra_shader_outputs.num; +} /** * Provide TGSI sampler objects for vertex/geometry shaders that use diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c58c12cc497..5ad26f1cf48 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -152,6 +152,12 @@ draw_total_vs_outputs(const struct draw_context *draw); uint draw_total_gs_outputs(const struct draw_context *draw); +uint +draw_total_tcs_outputs(const struct draw_context *draw); + +uint +draw_total_tes_outputs(const struct draw_context *draw); + void draw_texture_sampler(struct draw_context *draw, enum pipe_shader_type shader_type, @@ -242,6 +248,23 @@ void draw_bind_geometry_shader(struct draw_context *draw, void draw_delete_geometry_shader(struct draw_context *draw, struct draw_geometry_shader *dvs); +/* + * Tess shader functions + */ +struct draw_tess_ctrl_shader * +draw_create_tess_ctrl_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_tess_ctrl_shader(struct draw_context *draw, + struct draw_tess_ctrl_shader *dvs); +void draw_delete_tess_ctrl_shader(struct draw_context *draw, + struct draw_tess_ctrl_shader *dvs); +struct draw_tess_eval_shader * +draw_create_tess_eval_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_tess_eval_shader(struct draw_context *draw, + struct draw_tess_eval_shader *dvs); +void draw_delete_tess_eval_shader(struct draw_context *draw, + struct draw_tess_eval_shader *dvs); /* * Vertex data functions diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 177b83ee659..e7cf576cbd1 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -37,6 +37,7 @@ #include "gallivm/lp_bld_gather.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_coro.h" #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" @@ -80,6 +81,33 @@ draw_gs_llvm_iface(const struct lp_build_gs_iface *iface) return (const struct draw_gs_llvm_iface *)iface; } +struct draw_tcs_llvm_iface { + struct lp_build_tcs_iface base; + + struct draw_tcs_llvm_variant *variant; + LLVMValueRef input; + LLVMValueRef output; +}; + +static inline const struct draw_tcs_llvm_iface * +draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface) +{ + return (const struct draw_tcs_llvm_iface *)iface; +} + +struct draw_tes_llvm_iface { + struct lp_build_tes_iface base; + + struct draw_tes_llvm_variant *variant; + LLVMValueRef input; +}; + +static inline const struct draw_tes_llvm_iface * +draw_tes_llvm_iface(const struct lp_build_tes_iface *iface) +{ + return (const struct draw_tes_llvm_iface *)iface; +} + /** * Create LLVM type for draw_vertex_buffer. */ @@ -751,6 +779,12 @@ draw_llvm_create(struct draw_context *draw, LLVMContextRef context) llvm->nr_gs_variants = 0; make_empty_list(&llvm->gs_variants_list); + llvm->nr_tcs_variants = 0; + make_empty_list(&llvm->tcs_variants_list); + + llvm->nr_tes_variants = 0; + make_empty_list(&llvm->tes_variants_list); + return llvm; fail: @@ -1827,9 +1861,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * (though this would be fixable here, but couldn't just broadcast * the values). */ - const boolean bypass_viewport = key->has_gs || key->bypass_viewport || + const boolean bypass_viewport = key->has_gs_or_tes || key->bypass_viewport || vs_info->writes_viewport_index; - const boolean enable_cliptest = !key->has_gs && (key->clip_xy || + const boolean enable_cliptest = !key->has_gs_or_tes && (key->clip_xy || key->clip_z || key->clip_user || key->need_edgeflags); @@ -2265,7 +2299,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) /* XXX assumes edgeflag output not at 0 */ key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; - key->has_gs = llvm->draw->gs.geometry_shader != NULL; + key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL; key->num_outputs = draw_total_vs_outputs(llvm->draw); /* All variants of this shader will have the same value for @@ -2345,7 +2379,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) debug_printf("bypass_viewport = %u\n", key->bypass_viewport); debug_printf("clip_halfz = %u\n", key->clip_halfz); debug_printf("need_edgeflags = %u\n", key->need_edgeflags); - debug_printf("has_gs = %u\n", key->has_gs); + debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes); debug_printf("ucp_enable = %u\n", key->ucp_enable); for (i = 0 ; i < key->nr_vertex_elements; i++) { @@ -2859,3 +2893,1082 @@ draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key) debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format)); } + +static void +create_tcs_jit_types(struct draw_tcs_llvm_variant *var) +{ + struct gallivm_state *gallivm = var->gallivm; + LLVMTypeRef texture_type, sampler_type, image_type, context_type; + + texture_type = create_jit_texture_type(gallivm, "texture"); + sampler_type = create_jit_sampler_type(gallivm, "sampler"); + image_type = create_jit_image_type(gallivm, "image"); + + context_type = create_tcs_jit_context_type(gallivm, + 0, + texture_type, sampler_type, + image_type, + "draw_tcs_jit_context"); + var->input_array_type = create_tcs_jit_input_type(gallivm); + var->output_array_type = create_tcs_jit_output_type(gallivm); + var->context_ptr_type = LLVMPointerType(context_type, 0); +} + +static LLVMTypeRef +get_tcs_context_ptr_type(struct draw_tcs_llvm_variant *variant) +{ + if (!variant->context_ptr_type) + create_tcs_jit_types(variant); + return variant->context_ptr_type; +} + +static LLVMValueRef +draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface, + struct lp_build_context *bld, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index) +{ + const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface); + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[3]; + LLVMValueRef res; + struct lp_type type = bld->type; + + if (is_vindex_indirect || is_aindex_indirect) { + int i; + + res = bld->zero; + for (i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef vert_chan_index = vertex_index; + LLVMValueRef attr_chan_index = attrib_index; + LLVMValueRef channel_vec; + + if (is_vindex_indirect) { + vert_chan_index = LLVMBuildExtractElement(builder, + vertex_index, idx, ""); + } + if (is_aindex_indirect) { + attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + + indices[0] = vert_chan_index; + indices[1] = attr_chan_index; + indices[2] = swizzle_index; + + channel_vec = LLVMBuildGEP(builder, tcs->input, indices, 3, ""); + channel_vec = LLVMBuildLoad(builder, channel_vec, ""); + + res = LLVMBuildInsertElement(builder, res, channel_vec, idx, ""); + } + } else { + indices[0] = vertex_index; + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, tcs->input, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + res = lp_build_broadcast_scalar(bld, res); + } + return res; +} + +static LLVMValueRef +draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface, + struct lp_build_context *bld, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index, + uint32_t name) +{ + const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface); + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[3]; + LLVMValueRef res; + struct lp_type type = bld->type; + + if (is_vindex_indirect || is_aindex_indirect) { + int i; + + res = bld->zero; + for (i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef vert_chan_index = vertex_index; + LLVMValueRef attr_chan_index = attrib_index; + LLVMValueRef channel_vec; + + if (is_vindex_indirect) { + vert_chan_index = LLVMBuildExtractElement(builder, + vertex_index, idx, ""); + } + if (is_aindex_indirect) { + attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + + indices[0] = vert_chan_index; + indices[1] = attr_chan_index; + indices[2] = swizzle_index; + + channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, ""); + channel_vec = LLVMBuildLoad(builder, channel_vec, ""); + + res = LLVMBuildInsertElement(builder, res, channel_vec, idx, ""); + } + } else { + indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0); + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, tcs->output, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + res = lp_build_broadcast_scalar(bld, res); + } + return res; +} + +static void +draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface, + struct lp_build_context *bld, + unsigned name, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index, + LLVMValueRef value, + LLVMValueRef mask_vec) +{ + const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface); + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[3]; + LLVMValueRef res; + struct lp_type type = bld->type; + + if (is_vindex_indirect || is_aindex_indirect) { + int i; + + for (i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0); + LLVMValueRef attr_chan_index = attrib_index; + LLVMValueRef channel_vec; + + if (is_vindex_indirect) { + vert_chan_index = LLVMBuildExtractElement(builder, + vertex_index, idx, ""); + } + if (is_aindex_indirect) { + attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + + indices[0] = vert_chan_index; + indices[1] = attr_chan_index; + indices[2] = swizzle_index; + + channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, ""); + + res = LLVMBuildExtractElement(builder, value, idx, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, ""); + lp_build_if(&ifthen, gallivm, cond); + LLVMBuildStore(builder, res, channel_vec); + lp_build_endif(&ifthen); + } + } else { + indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0); + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, tcs->output, indices, 3, ""); + for (unsigned i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, ""); + lp_build_if(&ifthen, gallivm, cond); + LLVMBuildStore(builder, val, res); + lp_build_endif(&ifthen); + } + } +} + + +static LLVMValueRef +generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant, + struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type mask_type = lp_int_type(tcs_type); + LLVMValueRef num_vecs; + LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0); + unsigned i; + + num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit); + for (i = 0; i < tcs_type.length; i++) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, ""); + } + mask_val = lp_build_compare(gallivm, mask_type, + PIPE_FUNC_GREATER, num_vecs, mask_val); + + return mask_val; +} + +static void +draw_tcs_llvm_generate(struct draw_llvm *llvm, + struct draw_tcs_llvm_variant *variant) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); + LLVMTypeRef arg_types[6]; + LLVMTypeRef func_type, coro_func_type; + LLVMValueRef variant_func, variant_coro; + LLVMValueRef context_ptr; + LLVMValueRef input_array, output_array, prim_id, patch_vertices_in; + LLVMValueRef mask_val; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + struct lp_build_context bld, bldvec; + struct lp_build_sampler_soa *sampler = 0; + struct lp_build_image_soa *image = NULL; + struct lp_bld_tgsi_system_values system_values; + char func_name[64], func_name_coro[64]; + unsigned i; + struct draw_tcs_llvm_iface tcs_iface; + struct lp_build_mask_context mask; + LLVMValueRef consts_ptr, num_consts_ptr; + LLVMValueRef ssbos_ptr, num_ssbos_ptr; + struct lp_type tcs_type; + unsigned vector_length = variant->shader->base.vector_length; + + memset(&system_values, 0, sizeof(system_values)); + + snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant%u", + variant->shader->variants_cached); + + snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant%u", + variant->shader->variants_cached); + + arg_types[0] = get_tcs_context_ptr_type(variant); /* context */ + arg_types[1] = variant->input_array_type; /* input */ + arg_types[2] = variant->output_array_type; + arg_types[3] = int32_type; + arg_types[4] = int32_type; + arg_types[5] = int32_type; /* coroutine only */ + + func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0); + + coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0); + + variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); + + variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type); + + variant->function = variant_func; + LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); + + LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv); + + for (i = 0; i < ARRAY_SIZE(arg_types); ++i) { + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { + lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS); + lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + } + } + + context_ptr = LLVMGetParam(variant_func, 0); + input_array = LLVMGetParam(variant_func, 1); + output_array = LLVMGetParam(variant_func, 2); + prim_id = LLVMGetParam(variant_func, 3); + patch_vertices_in = LLVMGetParam(variant_func, 4); + + lp_build_name(context_ptr, "context"); + lp_build_name(input_array, "input"); + lp_build_name(output_array, "output"); + lp_build_name(prim_id, "prim_id"); + lp_build_name(patch_vertices_in, "patch_vertices_in"); + + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); + builder = gallivm->builder; + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, gallivm, lp_type_int(32)); + + memset(&tcs_type, 0, sizeof tcs_type); + tcs_type.floating = TRUE; /* floating point values */ + tcs_type.sign = TRUE; /* values are signed */ + tcs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + tcs_type.width = 32; /* 32-bit float */ + tcs_type.length = vector_length; + + lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type)); + + LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out); + LLVMValueRef step = lp_build_const_int32(gallivm, vector_length); + + struct lp_build_loop_state loop_state[2]; + LLVMValueRef num_inner_loop; + unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length); + num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length); + LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls"); + unsigned end_coroutine = INT_MAX; + lp_build_loop_begin(&loop_state[1], gallivm, + lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */ + lp_build_loop_begin(&loop_state[0], gallivm, + lp_build_const_int32(gallivm, 0)); /* inner loop */ + { + LLVMValueRef args[6]; + args[0] = context_ptr; + args[1] = input_array; + args[2] = output_array; + args[3] = prim_id; + args[4] = patch_vertices_in; + args[5] = loop_state[0].counter; + LLVMValueRef coro_entry = LLVMBuildGEP(builder, coro_hdls, &loop_state[0].counter, 1, ""); + LLVMValueRef coro_hdl = LLVMBuildLoad(builder, coro_entry, "coro_hdl"); + + struct lp_build_if_state ifstate; + LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter, + lp_build_const_int32(gallivm, 0), ""); + /* first time here - call the coroutine function entry point */ + lp_build_if(&ifstate, gallivm, cmp); + LLVMValueRef coro_ret = LLVMBuildCall(builder, variant_coro, args, 6, ""); + LLVMBuildStore(builder, coro_ret, coro_entry); + lp_build_else(&ifstate); + /* subsequent calls for this invocation - check if done. */ + LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl); + struct lp_build_if_state ifstate2; + lp_build_if(&ifstate2, gallivm, coro_done); + /* if done destroy and force loop exit */ + lp_build_coro_destroy(gallivm, coro_hdl); + lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1)); + lp_build_else(&ifstate2); + /* otherwise resume the coroutine */ + lp_build_coro_resume(gallivm, coro_hdl); + lp_build_endif(&ifstate2); + lp_build_endif(&ifstate); + lp_build_loop_force_reload_counter(&loop_state[1]); + } + lp_build_loop_end_cond(&loop_state[0], + num_inner_loop, + NULL, LLVMIntUGE); + lp_build_loop_end_cond(&loop_state[1], + lp_build_const_int32(gallivm, end_coroutine), + NULL, LLVMIntEQ); + LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32))); + + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry"); + LLVMPositionBuilderAtEnd(builder, block); + + context_ptr = LLVMGetParam(variant_coro, 0); + input_array = LLVMGetParam(variant_coro, 1); + output_array = LLVMGetParam(variant_coro, 2); + prim_id = LLVMGetParam(variant_coro, 3); + patch_vertices_in = LLVMGetParam(variant_coro, 4); + + consts_ptr = draw_tcs_jit_context_constants(variant->gallivm, context_ptr); + num_consts_ptr = + draw_tcs_jit_context_num_constants(variant->gallivm, context_ptr); + + ssbos_ptr = draw_tcs_jit_context_ssbos(variant->gallivm, context_ptr); + num_ssbos_ptr = + draw_tcs_jit_context_num_ssbos(variant->gallivm, context_ptr); + sampler = draw_llvm_sampler_soa_create(variant->key.samplers); + image = draw_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key)); + + LLVMValueRef counter = LLVMGetParam(variant_coro, 5); + LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length)); + for (i = 0; i < vector_length; i++) { + LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), lp_build_const_int32(gallivm, i), ""); + invocvec = LLVMBuildInsertElement(builder, invocvec, idx, idx, ""); + } + + system_values.invocation_id = invocvec; + system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id); + system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in); + tcs_iface.input = input_array; + tcs_iface.output = output_array; + tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input; + tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output; + tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output; + + + { + LLVMValueRef coro_id = lp_build_coro_id(gallivm); + LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id); + + mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, "")); + lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val); + + struct lp_build_coro_suspend_info coro_info; + + LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend"); + LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup"); + + coro_info.suspend = sus_block; + coro_info.cleanup = clean_block; + + struct lp_build_tgsi_params params; + memset(¶ms, 0, sizeof(params)); + + params.type = tcs_type; + params.mask = &mask; + params.consts_ptr = consts_ptr; + params.const_sizes_ptr = num_consts_ptr; + params.system_values = &system_values; + params.context_ptr = context_ptr; + params.sampler = sampler; + params.info = &llvm->draw->tcs.tess_ctrl_shader->info; + params.ssbo_ptr = ssbos_ptr; + params.ssbo_sizes_ptr = num_ssbos_ptr; + params.image = image; + params.coro = &coro_info; + params.tcs_iface = &tcs_iface.base; + + lp_build_nir_soa(variant->gallivm, + llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, + ¶ms, NULL); + + lp_build_mask_end(&mask); + + lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true); + LLVMPositionBuilderAtEnd(builder, clean_block); + + lp_build_coro_free_mem(gallivm, coro_id, coro_hdl); + + LLVMBuildBr(builder, sus_block); + LLVMPositionBuilderAtEnd(builder, sus_block); + + lp_build_coro_end(gallivm, coro_hdl); + LLVMBuildRet(builder, coro_hdl); + } + + sampler->destroy(sampler); + image->destroy(image); + gallivm_verify_function(gallivm, variant_func); + gallivm_verify_function(gallivm, variant_coro); +} + +struct draw_tcs_llvm_variant * +draw_tcs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_outputs, + const struct draw_tcs_llvm_variant_key *key) +{ + struct draw_tcs_llvm_variant *variant; + struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader); + char module_name[64]; + + variant = MALLOC(sizeof *variant + + shader->variant_key_size - sizeof variant->key); + if (!variant) + return NULL; + + variant->llvm = llvm; + variant->shader = shader; + + snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u", + variant->shader->variants_cached); + + variant->gallivm = gallivm_create(module_name, llvm->context); + + create_tcs_jit_types(variant); + + memcpy(&variant->key, key, shader->variant_key_size); + + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr); + draw_tcs_llvm_dump_variant_key(&variant->key); + } + + draw_tcs_llvm_generate(llvm, variant); + + gallivm_compile_module(variant->gallivm); + + variant->jit_func = (draw_tcs_jit_func) + gallivm_jit_function(variant->gallivm, variant->function); + + gallivm_free_ir(variant->gallivm); + + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + + return variant; +} + +void +draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n", + variant->shader->variants_cached, llvm->nr_tcs_variants); + } + + gallivm_destroy(variant->gallivm); + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_tcs_variants--; + FREE(variant); +} + +struct draw_tcs_llvm_variant_key * +draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store) +{ + unsigned i; + struct draw_tcs_llvm_variant_key *key; + struct draw_sampler_static_state *draw_sampler; + struct draw_image_static_state *draw_image; + + key = (struct draw_tcs_llvm_variant_key *)store; + + memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0])); + + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key->nr_sampler_views = + llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + } + else { + key->nr_sampler_views = key->nr_samplers; + } + + key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1; + + draw_sampler = key->samplers; + + memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); + + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, + llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]); + } + for (i = 0 ; i < key->nr_sampler_views; i++) { + lp_sampler_static_texture_state(&draw_sampler[i].texture_state, + llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]); + } + + draw_image = draw_tcs_llvm_variant_key_images(key); + memset(draw_image, 0, + key->nr_images * sizeof *draw_image); + for (i = 0; i < key->nr_images; i++) { + lp_sampler_static_texture_state_image(&draw_image[i].image_state, + llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]); + } + return key; +} + +void +draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key) +{ + unsigned i; + struct draw_sampler_static_state *sampler = key->samplers; + struct draw_image_static_state *image = draw_tcs_llvm_variant_key_images(key); + for (i = 0 ; i < key->nr_sampler_views; i++) { + debug_printf("sampler[%i].src_format = %s\n", i, + util_format_name(sampler[i].texture_state.format)); + } + + for (i = 0 ; i < key->nr_images; i++) + debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format)); + +} + +static void +create_tes_jit_types(struct draw_tes_llvm_variant *var) +{ + struct gallivm_state *gallivm = var->gallivm; + LLVMTypeRef texture_type, sampler_type, image_type, context_type; + + texture_type = create_jit_texture_type(gallivm, "texture"); + sampler_type = create_jit_sampler_type(gallivm, "sampler"); + image_type = create_jit_image_type(gallivm, "image"); + + context_type = create_tes_jit_context_type(gallivm, + 0, + texture_type, sampler_type, + image_type, + "draw_tes_jit_context"); + var->context_ptr_type = LLVMPointerType(context_type, 0); + + var->input_array_type = create_tes_jit_input_type(gallivm); +} + +static LLVMTypeRef +get_tes_context_ptr_type(struct draw_tes_llvm_variant *variant) +{ + if (!variant->context_ptr_type) + create_tes_jit_types(variant); + return variant->context_ptr_type; +} + +static LLVMValueRef +generate_tes_mask_value(struct draw_tes_llvm_variant *variant, + struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type mask_type = lp_int_type(tes_type); + LLVMValueRef num_prims; + LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0); + unsigned i; + + num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit); + for (i = 0; i < tes_type.length; i++) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, ""); + } + mask_val = lp_build_compare(gallivm, mask_type, + PIPE_FUNC_GREATER, num_prims, mask_val); + + return mask_val; +} + +static LLVMValueRef +draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface, + struct lp_build_context *bld, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index) +{ + const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface); + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[3]; + LLVMValueRef res; + struct lp_type type = bld->type; + + if (is_vindex_indirect || is_aindex_indirect) { + int i; + + res = bld->zero; + + for (i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef vert_chan_index = vertex_index; + LLVMValueRef attr_chan_index = attrib_index; + LLVMValueRef channel_vec; + + if (is_vindex_indirect) { + vert_chan_index = LLVMBuildExtractElement(builder, + vertex_index, idx, ""); + } + if (is_aindex_indirect) { + attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + + indices[0] = vert_chan_index; + indices[1] = attr_chan_index; + indices[2] = swizzle_index; + + channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, ""); + channel_vec = LLVMBuildLoad(builder, channel_vec, ""); + + res = LLVMBuildInsertElement(builder, res, channel_vec, idx, ""); + } + } else { + indices[0] = vertex_index; + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, tes->input, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + res = lp_build_broadcast_scalar(bld, res); + } + return res; +} + +static LLVMValueRef +draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface, + struct lp_build_context *bld, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index) +{ + const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface); + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[3]; + LLVMValueRef res; + struct lp_type type = bld->type; + + if (is_aindex_indirect) { + int i; + + res = bld->zero; + + for (i = 0; i < type.length; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + LLVMValueRef attr_chan_index = attrib_index; + LLVMValueRef channel_vec; + + if (is_aindex_indirect) { + attr_chan_index = LLVMBuildExtractElement(builder, + attrib_index, idx, ""); + } + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = attr_chan_index; + indices[2] = swizzle_index; + + channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, ""); + channel_vec = LLVMBuildLoad(builder, channel_vec, ""); + + res = LLVMBuildInsertElement(builder, res, channel_vec, idx, ""); + } + } else { + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, tes->input, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + res = lp_build_broadcast_scalar(bld, res); + } + return res; +} + +static void +draw_tes_llvm_generate(struct draw_llvm *llvm, + struct draw_tes_llvm_variant *variant) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); + LLVMTypeRef flt_type = LLVMFloatTypeInContext(context); + LLVMTypeRef arg_types[9]; + LLVMTypeRef func_type; + LLVMValueRef variant_func; + LLVMValueRef context_ptr; + LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord; + LLVMValueRef tess_inner, tess_outer, prim_id; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef mask_val; + struct lp_build_context bld, bldvec; + struct lp_build_sampler_soa *sampler = 0; + struct lp_build_image_soa *image = NULL; + struct lp_bld_tgsi_system_values system_values; + char func_name[64]; + unsigned i; + struct draw_tes_llvm_iface tes_iface; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + struct lp_build_mask_context mask; + LLVMValueRef consts_ptr, num_consts_ptr; + LLVMValueRef ssbos_ptr, num_ssbos_ptr; + LLVMValueRef step; + struct lp_type tes_type; + unsigned vector_length = variant->shader->base.vector_length; + + memset(&system_values, 0, sizeof(system_values)); + memset(&outputs, 0, sizeof(outputs)); + + snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant%u", + variant->shader->variants_cached); + + arg_types[0] = get_tes_context_ptr_type(variant); /* context */ + arg_types[1] = variant->input_array_type; /* input */ + arg_types[2] = variant->vertex_header_ptr_type; + arg_types[3] = int32_type; + arg_types[4] = int32_type; + arg_types[5] = LLVMPointerType(flt_type, 0); + arg_types[6] = LLVMPointerType(flt_type, 0); + arg_types[7] = LLVMPointerType(LLVMArrayType(flt_type, 4), 0); + arg_types[8] = LLVMPointerType(LLVMArrayType(flt_type, 2), 0); + + func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0); + variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); + + variant->function = variant_func; + LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); + + for (i = 0; i < ARRAY_SIZE(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + + context_ptr = LLVMGetParam(variant_func, 0); + input_array = LLVMGetParam(variant_func, 1); + io_ptr = LLVMGetParam(variant_func, 2); + prim_id = LLVMGetParam(variant_func, 3); + num_tess_coord = LLVMGetParam(variant_func, 4); + tess_coord[0] = LLVMGetParam(variant_func, 5); + tess_coord[1] = LLVMGetParam(variant_func, 6); + tess_outer = LLVMGetParam(variant_func, 7); + tess_inner = LLVMGetParam(variant_func, 8); + + lp_build_name(context_ptr, "context"); + lp_build_name(input_array, "input"); + lp_build_name(io_ptr, "io"); + lp_build_name(prim_id, "prim_id"); + lp_build_name(num_tess_coord, "num_tess_coord"); + lp_build_name(tess_coord[0], "tess_coord[0]"); + lp_build_name(tess_coord[1], "tess_coord[1]"); + lp_build_name(tess_outer, "tess_outer"); + lp_build_name(tess_inner, "tess_inner"); + + tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input; + tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input; + tes_iface.input = input_array; + tes_iface.variant = variant; + + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); + builder = gallivm->builder; + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, gallivm, lp_type_int(32)); + + memset(&tes_type, 0, sizeof tes_type); + tes_type.floating = TRUE; /* floating point values */ + tes_type.sign = TRUE; /* values are signed */ + tes_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + tes_type.width = 32; /* 32-bit float */ + tes_type.length = vector_length; + + lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type)); + consts_ptr = draw_tes_jit_context_constants(variant->gallivm, context_ptr); + num_consts_ptr = + draw_tes_jit_context_num_constants(variant->gallivm, context_ptr); + + ssbos_ptr = draw_tes_jit_context_ssbos(variant->gallivm, context_ptr); + num_ssbos_ptr = + draw_tes_jit_context_num_ssbos(variant->gallivm, context_ptr); + sampler = draw_llvm_sampler_soa_create(variant->key.samplers); + image = draw_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key)); + step = lp_build_const_int32(gallivm, vector_length); + + system_values.tess_outer = LLVMBuildLoad(builder, tess_outer, ""); + system_values.tess_inner = LLVMBuildLoad(builder, tess_inner, ""); + + system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id); + struct lp_build_loop_state lp_loop; + lp_build_loop_begin(&lp_loop, gallivm, bld.zero); + { + LLVMValueRef io; + + io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, ""); + mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter); + lp_build_mask_begin(&mask, gallivm, tes_type, mask_val); + + system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3)); + for (i = 0; i < 3; i++) { + LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length)); + for (unsigned j = 0; j < vector_length; j++) { + LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), ""); + LLVMValueRef tc_val; + if (i == 2) { + if (variant->shader->base.prim_mode == PIPE_PRIM_TRIANGLES) { + tc_val = lp_build_const_float(gallivm, 1.0); + tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[0], idx), ""); + tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[1], idx), ""); + } else + tc_val = lp_build_const_float(gallivm, 0.0); + } else + tc_val = lp_build_pointer_get(builder, tess_coord[i], idx); + + tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), ""); + } + system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, ""); + } + + struct lp_build_tgsi_params params; + memset(¶ms, 0, sizeof(params)); + + params.type = tes_type; + params.mask = &mask; + params.consts_ptr = consts_ptr; + params.const_sizes_ptr = num_consts_ptr; + params.system_values = &system_values; + params.context_ptr = context_ptr; + params.sampler = sampler; + params.info = &llvm->draw->tes.tess_eval_shader->info; + params.ssbo_ptr = ssbos_ptr; + params.ssbo_sizes_ptr = num_ssbos_ptr; + params.image = image; + params.tes_iface = &tes_iface.base; + + lp_build_nir_soa(variant->gallivm, + llvm->draw->tes.tess_eval_shader->state.ir.nir, + ¶ms, + outputs); + + lp_build_mask_end(&mask); + LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, + lp_int_type(tes_type), 0); + + convert_to_aos(gallivm, io, NULL, outputs, clipmask, + params.info->num_outputs, tes_type, FALSE); + } + lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE); + sampler->destroy(sampler); + image->destroy(image); + + LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32))); + gallivm_verify_function(gallivm, variant_func); +} + +struct draw_tes_llvm_variant * +draw_tes_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_outputs, + const struct draw_tes_llvm_variant_key *key) +{ + struct draw_tes_llvm_variant *variant; + struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader); + LLVMTypeRef vertex_header; + char module_name[64]; + + variant = MALLOC(sizeof *variant + + shader->variant_key_size - sizeof variant->key); + if (!variant) + return NULL; + + variant->llvm = llvm; + variant->shader = shader; + + snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u", + variant->shader->variants_cached); + + variant->gallivm = gallivm_create(module_name, llvm->context); + + create_tes_jit_types(variant); + + memcpy(&variant->key, key, shader->variant_key_size); + + vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs); + + variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); + + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr); + draw_tes_llvm_dump_variant_key(&variant->key); + } + + draw_tes_llvm_generate(llvm, variant); + + gallivm_compile_module(variant->gallivm); + + variant->jit_func = (draw_tes_jit_func) + gallivm_jit_function(variant->gallivm, variant->function); + + gallivm_free_ir(variant->gallivm); + + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + + return variant; +} + +void +draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n", + variant->shader->variants_cached, llvm->nr_tes_variants); + } + + gallivm_destroy(variant->gallivm); + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_tes_variants--; + FREE(variant); +} + +struct draw_tes_llvm_variant_key * +draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store) +{ + unsigned i; + struct draw_tes_llvm_variant_key *key; + struct draw_sampler_static_state *draw_sampler; + struct draw_image_static_state *draw_image; + + key = (struct draw_tes_llvm_variant_key *)store; + + memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0])); + + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key->nr_sampler_views = + llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + } + else { + key->nr_sampler_views = key->nr_samplers; + } + + key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1; + + draw_sampler = key->samplers; + + memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); + + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, + llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]); + } + for (i = 0 ; i < key->nr_sampler_views; i++) { + lp_sampler_static_texture_state(&draw_sampler[i].texture_state, + llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]); + } + + draw_image = draw_tes_llvm_variant_key_images(key); + memset(draw_image, 0, + key->nr_images * sizeof *draw_image); + for (i = 0; i < key->nr_images; i++) { + lp_sampler_static_texture_state_image(&draw_image[i].image_state, + llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]); + } + return key; +} + +void +draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key) +{ + unsigned i; + struct draw_sampler_static_state *sampler = key->samplers; + struct draw_image_static_state *image = draw_tes_llvm_variant_key_images(key); + for (i = 0 ; i < key->nr_sampler_views; i++) { + debug_printf("sampler[%i].src_format = %s\n", i, + util_format_name(sampler[i].texture_state.format)); + } + + for (i = 0 ; i < key->nr_images; i++) + debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format)); + +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 4e7859d5dac..d376a84c073 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -44,6 +44,8 @@ struct draw_llvm; struct llvm_vertex_shader; struct llvm_geometry_shader; +struct llvm_tess_ctrl_shader; +struct llvm_tess_eval_shader; struct draw_jit_texture { @@ -467,7 +469,7 @@ struct draw_llvm_variant_key unsigned clip_halfz:1; unsigned bypass_viewport:1; unsigned need_edgeflags:1; - unsigned has_gs:1; + unsigned has_gs_or_tes:1; unsigned num_outputs:8; unsigned ucp_enable:PIPE_MAX_CLIP_PLANES; /* note padding here - must use memset */ @@ -495,6 +497,24 @@ struct draw_gs_llvm_variant_key /* Followed by variable number of images.*/ }; +struct draw_tcs_llvm_variant_key +{ + unsigned nr_samplers:8; + unsigned nr_sampler_views:8; + unsigned nr_images:8; + struct draw_sampler_static_state samplers[1]; + /* Followed by variable number of images.*/ +}; + +struct draw_tes_llvm_variant_key +{ + unsigned nr_samplers:8; + unsigned nr_sampler_views:8; + unsigned nr_images:8; + struct draw_sampler_static_state samplers[1]; + /* Followed by variable number of images.*/ +}; + #define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \ (sizeof(struct draw_llvm_variant_key) + \ PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state) + \ @@ -506,6 +526,16 @@ struct draw_gs_llvm_variant_key PIPE_MAX_SHADER_IMAGES * sizeof(struct draw_image_static_state) + \ PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state)) +#define DRAW_TCS_LLVM_MAX_VARIANT_KEY_SIZE \ + (sizeof(struct draw_tcs_llvm_variant_key) + \ + PIPE_MAX_SHADER_IMAGES * sizeof(struct draw_image_static_state) + \ + PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state)) + +#define DRAW_TES_LLVM_MAX_VARIANT_KEY_SIZE \ + (sizeof(struct draw_tes_llvm_variant_key) + \ + PIPE_MAX_SHADER_IMAGES * sizeof(struct draw_image_static_state) + \ + PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state)) + static inline size_t draw_llvm_variant_key_size(unsigned nr_vertex_elements, @@ -526,6 +556,21 @@ draw_gs_llvm_variant_key_size(unsigned nr_samplers, unsigned nr_images) (nr_samplers - 1) * sizeof(struct draw_sampler_static_state)); } +static inline size_t +draw_tcs_llvm_variant_key_size(unsigned nr_samplers, unsigned nr_images) +{ + return (sizeof(struct draw_tcs_llvm_variant_key) + + (nr_images) * sizeof(struct draw_sampler_static_state) + + (nr_samplers - 1) * sizeof(struct draw_sampler_static_state)); +} + +static inline size_t +draw_tes_llvm_variant_key_size(unsigned nr_samplers, unsigned nr_images) +{ + return (sizeof(struct draw_tes_llvm_variant_key) + + (nr_images) * sizeof(struct draw_sampler_static_state) + + (nr_samplers - 1) * sizeof(struct draw_sampler_static_state)); +} static inline struct draw_sampler_static_state * draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key) @@ -550,6 +595,20 @@ draw_gs_llvm_variant_key_images(struct draw_gs_llvm_variant_key *key) &key->samplers[key->nr_samplers]; } +static inline struct draw_image_static_state * +draw_tcs_llvm_variant_key_images(struct draw_tcs_llvm_variant_key *key) +{ + return (struct draw_image_static_state *) + &key->samplers[key->nr_samplers]; +} + +static inline struct draw_image_static_state * +draw_tes_llvm_variant_key_images(struct draw_tes_llvm_variant_key *key) +{ + return (struct draw_image_static_state *) + &key->samplers[key->nr_samplers]; +} + struct draw_llvm_variant_list_item { struct draw_llvm_variant *base; @@ -562,6 +621,17 @@ struct draw_gs_llvm_variant_list_item struct draw_gs_llvm_variant_list_item *next, *prev; }; +struct draw_tcs_llvm_variant_list_item +{ + struct draw_tcs_llvm_variant *base; + struct draw_tcs_llvm_variant_list_item *next, *prev; +}; + +struct draw_tes_llvm_variant_list_item +{ + struct draw_tes_llvm_variant *base; + struct draw_tes_llvm_variant_list_item *next, *prev; +}; struct draw_llvm_variant { @@ -612,6 +682,57 @@ struct draw_gs_llvm_variant struct draw_gs_llvm_variant_key key; }; +struct draw_tcs_llvm_variant +{ + struct gallivm_state *gallivm; + + /* LLVM JIT builder types */ + LLVMTypeRef context_ptr_type; + LLVMTypeRef input_array_type; + LLVMTypeRef output_array_type; + + LLVMValueRef context_ptr; + LLVMValueRef io_ptr; + LLVMValueRef num_prims; + LLVMValueRef function; + draw_tcs_jit_func jit_func; + + struct llvm_tess_ctrl_shader *shader; + + struct draw_llvm *llvm; + struct draw_tcs_llvm_variant_list_item list_item_global; + struct draw_tcs_llvm_variant_list_item list_item_local; + + /* key is variable-sized, must be last */ + struct draw_tcs_llvm_variant_key key; +}; + +struct draw_tes_llvm_variant +{ + struct gallivm_state *gallivm; + + /* LLVM JIT builder types */ + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef input_array_type; + LLVMTypeRef patch_input_array_type; + + LLVMValueRef context_ptr; + LLVMValueRef io_ptr; + LLVMValueRef num_prims; + LLVMValueRef function; + draw_tes_jit_func jit_func; + + struct llvm_tess_eval_shader *shader; + + struct draw_llvm *llvm; + struct draw_tes_llvm_variant_list_item list_item_global; + struct draw_tes_llvm_variant_list_item list_item_local; + + /* key is variable-sized, must be last */ + struct draw_tes_llvm_variant_key key; +}; + struct llvm_vertex_shader { struct draw_vertex_shader base; @@ -630,6 +751,23 @@ struct llvm_geometry_shader { unsigned variants_cached; }; +struct llvm_tess_ctrl_shader { + struct draw_tess_ctrl_shader base; + + unsigned variant_key_size; + struct draw_tcs_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; + +struct llvm_tess_eval_shader { + struct draw_tess_eval_shader base; + + unsigned variant_key_size; + struct draw_tes_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; struct draw_llvm { struct draw_context *draw; @@ -639,12 +777,20 @@ struct draw_llvm { struct draw_jit_context jit_context; struct draw_gs_jit_context gs_jit_context; + struct draw_tcs_jit_context tcs_jit_context; + struct draw_tes_jit_context tes_jit_context; struct draw_llvm_variant_list_item vs_variants_list; int nr_variants; struct draw_gs_llvm_variant_list_item gs_variants_list; int nr_gs_variants; + + struct draw_tcs_llvm_variant_list_item tcs_variants_list; + int nr_tcs_variants; + + struct draw_tes_llvm_variant_list_item tes_variants_list; + int nr_tes_variants; }; @@ -660,8 +806,17 @@ llvm_geometry_shader(struct draw_geometry_shader *gs) return (struct llvm_geometry_shader *)gs; } +static inline struct llvm_tess_ctrl_shader * +llvm_tess_ctrl_shader(struct draw_tess_ctrl_shader *tcs) +{ + return (struct llvm_tess_ctrl_shader *)tcs; +} - +static inline struct llvm_tess_eval_shader * +llvm_tess_eval_shader(struct draw_tess_eval_shader *tes) +{ + return (struct llvm_tess_eval_shader *)tes; +} struct draw_llvm * draw_llvm_create(struct draw_context *draw, LLVMContextRef llvm_context); @@ -698,6 +853,34 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store); void draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key); +struct draw_tcs_llvm_variant * +draw_tcs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_vertex_header_attribs, + const struct draw_tcs_llvm_variant_key *key); + +void +draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant); + +struct draw_tcs_llvm_variant_key * +draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store); + +void +draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key); + +struct draw_tes_llvm_variant * +draw_tes_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_vertex_header_attribs, + const struct draw_tes_llvm_variant_key *key); + +void +draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant); + +struct draw_tes_llvm_variant_key * +draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store); + +void +draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key); + struct lp_build_sampler_soa * draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 0fd0caab31b..e0195f455e1 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -157,7 +157,7 @@ struct draw_context unsigned prim; unsigned opt; /**< bitmask of PT_x flags */ unsigned eltSize; /* saved eltSize for flushing */ - + ubyte vertices_per_patch; boolean rebind_parameters; struct { @@ -206,12 +206,20 @@ struct draw_context unsigned vs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; const void *gs_constants[PIPE_MAX_CONSTANT_BUFFERS]; unsigned gs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; + const void *tcs_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned tcs_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; + const void *tes_constants[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned tes_constants_size[PIPE_MAX_CONSTANT_BUFFERS]; /** shader buffers (for vertex/geometry shader) */ const void *vs_ssbos[PIPE_MAX_SHADER_BUFFERS]; unsigned vs_ssbos_size[PIPE_MAX_SHADER_BUFFERS]; const void *gs_ssbos[PIPE_MAX_SHADER_BUFFERS]; unsigned gs_ssbos_size[PIPE_MAX_SHADER_BUFFERS]; + const void *tcs_ssbos[PIPE_MAX_SHADER_BUFFERS]; + unsigned tcs_ssbos_size[PIPE_MAX_SHADER_BUFFERS]; + const void *tes_ssbos[PIPE_MAX_SHADER_BUFFERS]; + unsigned tes_ssbos_size[PIPE_MAX_SHADER_BUFFERS]; /* pointer to planes */ float (*planes)[DRAW_TOTAL_CLIP_PLANES][4]; @@ -303,6 +311,34 @@ struct draw_context } gs; + /* Tessellation state */ + struct { + struct draw_tess_ctrl_shader *tess_ctrl_shader; + + /** Fields for TGSI interpreter / execution */ + struct { + struct tgsi_exec_machine *machine; + + struct tgsi_sampler *sampler; + struct tgsi_image *image; + struct tgsi_buffer *buffer; + } tgsi; + } tcs; + + struct { + struct draw_tess_eval_shader *tess_eval_shader; + uint position_output; + + /** Fields for TGSI interpreter / execution */ + struct { + struct tgsi_exec_machine *machine; + + struct tgsi_sampler *sampler; + struct tgsi_image *image; + struct tgsi_buffer *buffer; + } tgsi; + } tes; + /** Fragment shader state */ struct { struct draw_fragment_shader *fragment_shader; @@ -349,6 +385,8 @@ struct draw_context struct pipe_query_data_pipeline_statistics statistics; boolean collect_statistics; + float default_outer_tess_level[4]; + float default_inner_tess_level[2]; bool collect_primgen; struct draw_assembler *ia; diff --git a/src/gallium/auxiliary/draw/draw_tess.c b/src/gallium/auxiliary/draw/draw_tess.c new file mode 100644 index 00000000000..dc7282af39a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_tess.c @@ -0,0 +1,630 @@ +/************************************************************************** + * + * Copyright 2020 Red Hat. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ +#include "draw_tess.h" +#ifdef LLVM_AVAILABLE +#include "draw_llvm.h" +#endif + +#include "tessellator/p_tessellator.h" +#include "nir/nir_to_tgsi_info.h" +#include "util/u_prim.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +static inline int +draw_tes_get_input_index(int semantic, int index, + const struct tgsi_shader_info *input_info) +{ + int i; + const ubyte *input_semantic_names = input_info->output_semantic_name; + const ubyte *input_semantic_indices = input_info->output_semantic_index; + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if (input_semantic_names[i] == semantic && + input_semantic_indices[i] == index) + return i; + } + return -1; +} + +#ifdef LLVM_AVAILABLE +#define DEBUG_INPUTS 0 +static void +llvm_fetch_tcs_input(struct draw_tess_ctrl_shader *shader, + const struct draw_prim_info *input_prim_info, + unsigned prim_id, + unsigned num_vertices) +{ + const float (*input_ptr)[4]; + float (*input_data)[32][NUM_TCS_INPUTS][TGSI_NUM_CHANNELS] = &shader->tcs_input->data; + unsigned slot, i; + int vs_slot; + unsigned input_vertex_stride = shader->input_vertex_stride; + + input_ptr = shader->input; + for (i = 0; i < num_vertices; i++) { + const float (*input)[4]; + int vertex_idx = prim_id * num_vertices + i; + if (input_prim_info->linear == FALSE) + vertex_idx = input_prim_info->elts[vertex_idx]; +#if DEBUG_INPUTS + debug_printf("%d) tcs vertex index = %d (prim idx = %d)\n", + i, prim_id, 0); +#endif + input = (const float (*)[4])((const char *)input_ptr + (vertex_idx * input_vertex_stride)); + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { + vs_slot = draw_tes_get_input_index( + shader->info.input_semantic_name[slot], + shader->info.input_semantic_index[slot], + shader->input_info); + if (vs_slot < 0) { + debug_printf("VS/TCS signature mismatch!\n"); + (*input_data)[i][slot][0] = 0; + (*input_data)[i][slot][1] = 0; + (*input_data)[i][slot][2] = 0; + (*input_data)[i][slot][3] = 0; + } else { + (*input_data)[i][slot][0] = input[vs_slot][0]; + (*input_data)[i][slot][1] = input[vs_slot][1]; + (*input_data)[i][slot][2] = input[vs_slot][2]; + (*input_data)[i][slot][3] = input[vs_slot][3]; +#if DEBUG_INPUTS + debug_printf("\t\t%p = %f %f %f %f\n", &(*input_data)[i][slot][0], + (*input_data)[i][slot][0], + (*input_data)[i][slot][1], + (*input_data)[i][slot][2], + (*input_data)[i][slot][3]); +#endif + ++vs_slot; + } + } + } +} + +#define DEBUG_OUTPUTS 0 +static void +llvm_store_tcs_output(struct draw_tess_ctrl_shader *shader, + unsigned prim_id, + struct draw_vertex_info *output_verts, + unsigned vert_start) +{ + float (*output_ptr)[4]; + float (*output_data)[32][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = &shader->tcs_output->data; + unsigned slot, i; + unsigned num_vertices = shader->vertices_out; + + char *output = (char *)output_verts->verts->data; + output += vert_start * output_verts->stride; + + for (i = 0; i < num_vertices; i++) { + +#if DEBUG_OUTPUTS + debug_printf("%d) tcs store vertex index = %d (prim idx = %d)\n", + i, prim_id, 0); +#endif + output_ptr = (float(*)[4])(output + (i * output_verts->stride)); + + for (slot = 0; slot < shader->info.num_outputs; ++slot) { + output_ptr[slot][0] = (*output_data)[i][slot][0]; + output_ptr[slot][1] = (*output_data)[i][slot][1]; + output_ptr[slot][2] = (*output_data)[i][slot][2]; + output_ptr[slot][3] = (*output_data)[i][slot][3]; +#if DEBUG_OUTPUTS + debug_printf("\t\t%p = %f %f %f %f\n", + &output_ptr[slot][0], + output_ptr[slot][0], + output_ptr[slot][1], + output_ptr[slot][2], + output_ptr[slot][3]); +#endif + } + } +} + +static void +llvm_tcs_run(struct draw_tess_ctrl_shader *shader, uint32_t prim_id) +{ + shader->current_variant->jit_func(shader->jit_context, shader->tcs_input->data, shader->tcs_output->data, prim_id, + shader->draw->pt.vertices_per_patch); +} +#endif + +/** + * Execute tess ctrl shader. + */ +int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + const struct tgsi_shader_info *input_info, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ) +{ + const float (*input)[4] = (const float (*)[4])input_verts->verts->data; + unsigned num_outputs = draw_total_tcs_outputs(shader->draw); + unsigned input_stride = input_verts->vertex_size; + unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); + unsigned num_patches = input_prim->count / shader->draw->pt.vertices_per_patch; + + output_verts->vertex_size = vertex_size; + output_verts->stride = output_verts->vertex_size; + output_verts->verts = NULL; + output_verts->count = 0; + shader->input = input; + shader->input_vertex_stride = input_stride; + shader->input_info = input_info; + + output_prims->linear = TRUE; + output_prims->start = 0; + output_prims->elts = NULL; + output_prims->count = 0; + output_prims->prim = PIPE_PRIM_PATCHES; + output_prims->flags = 0; + output_prims->primitive_lengths = NULL; + output_prims->primitive_count = 0; + +#ifdef LLVM_AVAILABLE + for (unsigned i = 0; i < num_patches; i++) { + uint32_t vert_start = output_verts->count; + + output_verts->count += shader->vertices_out; + + llvm_fetch_tcs_input(shader, input_prim, i, shader->draw->pt.vertices_per_patch); + + llvm_tcs_run(shader, i); + + uint32_t old_verts = util_align_npot(vert_start, 16); + uint32_t new_verts = util_align_npot(output_verts->count, 16); + uint32_t old_size = output_verts->vertex_size * old_verts; + uint32_t new_size = output_verts->vertex_size * new_verts; + output_verts->verts = REALLOC(output_verts->verts, old_size, new_size); + + llvm_store_tcs_output(shader, i, output_verts, vert_start); + } +#endif + + output_prims->primitive_count = num_patches; + return 0; +} + +#ifdef LLVM_AVAILABLE +#define DEBUG_INPUTS 0 +static void +llvm_fetch_tes_input(struct draw_tess_eval_shader *shader, + const struct draw_prim_info *input_prim_info, + unsigned prim_id, + unsigned num_vertices) +{ + const float (*input_ptr)[4]; + float (*input_data)[32][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = &shader->tes_input->data; + unsigned slot, i; + int vs_slot; + unsigned input_vertex_stride = shader->input_vertex_stride; + + input_ptr = shader->input; + for (i = 0; i < num_vertices; i++) { + const float (*input)[4]; + int vertex_idx = prim_id * num_vertices + i; + + if (input_prim_info->linear == FALSE) + vertex_idx = input_prim_info->elts[vertex_idx]; +#if DEBUG_INPUTS + debug_printf("%d) tes vertex index = %d (prim idx = %d)\n", + i, prim_id, 0); +#endif + input = (const float (*)[4])((const char *)input_ptr + (vertex_idx * input_vertex_stride)); + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { + vs_slot = draw_tes_get_input_index( + shader->info.input_semantic_name[slot], + shader->info.input_semantic_index[slot], + shader->input_info); + if (vs_slot < 0) { + debug_printf("TCS/TES signature mismatch!\n"); + (*input_data)[i][slot][0] = 0; + (*input_data)[i][slot][1] = 0; + (*input_data)[i][slot][2] = 0; + (*input_data)[i][slot][3] = 0; + } else { + (*input_data)[i][slot][0] = input[vs_slot][0]; + (*input_data)[i][slot][1] = input[vs_slot][1]; + (*input_data)[i][slot][2] = input[vs_slot][2]; + (*input_data)[i][slot][3] = input[vs_slot][3]; +#if DEBUG_INPUTS + debug_printf("\t\t%p = %f %f %f %f\n", + &input[vs_slot][0], + (*input_data)[i][slot][0], + (*input_data)[i][slot][1], + (*input_data)[i][slot][2], + (*input_data)[i][slot][3]); +#endif + ++vs_slot; + } + } + } +} + +static void +llvm_fetch_tess_factors(struct draw_tess_eval_shader *shader, + unsigned patch_id, + unsigned num_vertices, + struct pipe_tessellation_factors *factors) +{ + int outer_slot = draw_tes_get_input_index( + TGSI_SEMANTIC_TESSOUTER, 0, shader->input_info); + int inner_slot = draw_tes_get_input_index( + TGSI_SEMANTIC_TESSINNER, 0, shader->input_info); + const float (*input_ptr)[4]; + const float (*input)[4]; + input_ptr = shader->input; + input = (const float (*)[4])((const char *)input_ptr + ((patch_id * num_vertices) * shader->input_vertex_stride)); + + if (outer_slot != -1) { + for (unsigned i = 0; i < 4; i++) + factors->outer_tf[i] = input[outer_slot][i]; + } else { + for (unsigned i = 0; i < 4; i++) + factors->outer_tf[i] = shader->draw->default_outer_tess_level[i]; + } + if (inner_slot != -1) { + for (unsigned i = 0; i < 2; i++) + factors->inner_tf[i] = input[inner_slot][i]; + } else { + for (unsigned i = 0; i < 2; i++) + factors->inner_tf[i] = shader->draw->default_inner_tess_level[i]; + } +} + +static void +llvm_tes_run(struct draw_tess_eval_shader *shader, + uint32_t prim_id, + struct pipe_tessellator_data *tess_data, + struct pipe_tessellation_factors *tess_factors, + struct vertex_header *output) +{ + shader->current_variant->jit_func(shader->jit_context, shader->tes_input->data, output, prim_id, + tess_data->num_domain_points, tess_data->domain_points_u, tess_data->domain_points_v, + tess_factors->outer_tf, tess_factors->inner_tf); +} +#endif + +/** + * Execute tess eval shader. + */ +int draw_tess_eval_shader_run(struct draw_tess_eval_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + unsigned num_input_vertices_per_patch, + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + const struct tgsi_shader_info *input_info, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims, + ushort **elts_out) +{ + const float (*input)[4] = (const float (*)[4])input_verts->verts->data; + unsigned num_outputs = draw_total_tes_outputs(shader->draw); + unsigned input_stride = input_verts->vertex_size; + unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); + ushort *elts = NULL; + output_verts->vertex_size = vertex_size; + output_verts->stride = output_verts->vertex_size; + output_verts->count = 0; + output_verts->verts = NULL; + + output_prims->linear = FALSE; + output_prims->start = 0; + output_prims->elts = NULL; + output_prims->count = 0; + output_prims->prim = get_tes_output_prim(shader); + output_prims->flags = 0; + output_prims->primitive_lengths = NULL; + output_prims->primitive_count = 0; + + shader->input = input; + shader->input_vertex_stride = input_stride; + shader->input_info = input_info; + +#ifdef LLVM_AVAILABLE + struct pipe_tessellation_factors factors; + struct pipe_tessellator_data data = { 0 }; + struct pipe_tessellator *ptess = p_tess_init(shader->prim_mode, + shader->spacing, + !shader->vertex_order_cw, + shader->point_mode); + for (unsigned i = 0; i < input_prim->primitive_count; i++) { + uint32_t vert_start = output_verts->count; + uint32_t prim_start = output_prims->primitive_count; + uint32_t elt_start = output_prims->count; + + llvm_fetch_tess_factors(shader, i, num_input_vertices_per_patch, &factors); + + /* tessellate with the factors for this primitive */ + p_tessellate(ptess, &factors, &data); + + if (data.num_domain_points == 0) + continue; + + uint32_t old_verts = vert_start; + uint32_t new_verts = vert_start + util_align_npot(data.num_domain_points, 4); + uint32_t old_size = output_verts->vertex_size * old_verts; + uint32_t new_size = output_verts->vertex_size * new_verts; + output_verts->verts = REALLOC(output_verts->verts, old_size, new_size); + + output_verts->count += data.num_domain_points; + + output_prims->count += data.num_indices; + elts = REALLOC(elts, elt_start * sizeof(uint16_t), + output_prims->count * sizeof(uint16_t)); + + for (unsigned i = 0; i < data.num_indices; i++) + elts[elt_start + i] = vert_start + data.indices[i]; + + llvm_fetch_tes_input(shader, input_prim, i, num_input_vertices_per_patch); + /* run once per primitive? */ + char *output = (char *)output_verts->verts; + output += vert_start * vertex_size; + llvm_tes_run(shader, i, &data, &factors, (struct vertex_header *)output); + + uint32_t prim_len = u_prim_vertex_count(output_prims->prim)->min; + output_prims->primitive_count += data.num_indices / prim_len; + output_prims->primitive_lengths = REALLOC(output_prims->primitive_lengths, prim_start * sizeof(uint32_t), + output_prims->primitive_count * sizeof(uint32_t)); + for (unsigned i = prim_start; i < output_prims->primitive_count; i++) { + output_prims->primitive_lengths[i] = prim_len; + } + } + p_tess_destroy(ptess); +#endif + + *elts_out = elts; + output_prims->elts = elts; + return 0; +} + +struct draw_tess_ctrl_shader * +draw_create_tess_ctrl_shader(struct draw_context *draw, + const struct pipe_shader_state *state) +{ +#ifdef LLVM_AVAILABLE + boolean use_llvm = draw->llvm != NULL; + struct llvm_tess_ctrl_shader *llvm_tcs = NULL; +#endif + struct draw_tess_ctrl_shader *tcs; + +#ifdef LLVM_AVAILABLE + if (use_llvm) { + llvm_tcs = CALLOC_STRUCT(llvm_tess_ctrl_shader); + + if (!llvm_tcs) + return NULL; + + tcs = &llvm_tcs->base; + + make_empty_list(&llvm_tcs->variants); + } else +#endif + { + tcs = CALLOC_STRUCT(draw_tess_ctrl_shader); + } + + if (!tcs) + return NULL; + + tcs->draw = draw; + tcs->state = *state; + + nir_tgsi_scan_shader(state->ir.nir, &tcs->info, true); + + tcs->vector_length = 4; + tcs->vertices_out = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; +#ifdef LLVM_AVAILABLE + if (use_llvm) { + + tcs->tcs_input = align_malloc(sizeof(struct draw_tcs_inputs), 16); + memset(tcs->tcs_input, 0, sizeof(struct draw_tcs_inputs)); + + tcs->tcs_output = align_malloc(sizeof(struct draw_tcs_outputs), 16); + memset(tcs->tcs_output, 0, sizeof(struct draw_tcs_outputs)); + + tcs->jit_context = &draw->llvm->tcs_jit_context; + llvm_tcs->variant_key_size = + draw_tcs_llvm_variant_key_size( + MAX2(tcs->info.file_max[TGSI_FILE_SAMPLER]+1, + tcs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1), + tcs->info.file_max[TGSI_FILE_IMAGE]+1); + } +#endif + return tcs; +} + +void draw_bind_tess_ctrl_shader(struct draw_context *draw, + struct draw_tess_ctrl_shader *dtcs) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + if (dtcs) { + draw->tcs.tess_ctrl_shader = dtcs; + } else { + draw->tcs.tess_ctrl_shader = NULL; + } +} + +void draw_delete_tess_ctrl_shader(struct draw_context *draw, + struct draw_tess_ctrl_shader *dtcs) +{ + if (!dtcs) + return; + +#ifdef LLVM_AVAILABLE + if (draw->llvm) { + struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(dtcs); + + struct draw_tcs_llvm_variant_list_item *li; + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_tcs_llvm_variant_list_item *next = next_elem(li); + draw_tcs_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + } +#endif + FREE(dtcs); +} + +#ifdef LLVM_AVAILABLE +void draw_tcs_set_current_variant(struct draw_tess_ctrl_shader *shader, + struct draw_tcs_llvm_variant *variant) +{ + shader->current_variant = variant; +} +#endif + +struct draw_tess_eval_shader * +draw_create_tess_eval_shader(struct draw_context *draw, + const struct pipe_shader_state *state) +{ +#ifdef LLVM_AVAILABLE + boolean use_llvm = draw->llvm != NULL; + struct llvm_tess_eval_shader *llvm_tes = NULL; +#endif + struct draw_tess_eval_shader *tes; + +#ifdef LLVM_AVAILABLE + if (use_llvm) { + llvm_tes = CALLOC_STRUCT(llvm_tess_eval_shader); + + if (!llvm_tes) + return NULL; + + tes = &llvm_tes->base; + make_empty_list(&llvm_tes->variants); + } else +#endif + { + tes = CALLOC_STRUCT(draw_tess_eval_shader); + } + + if (!tes) + return NULL; + + tes->draw = draw; + tes->state = *state; + + nir_tgsi_scan_shader(state->ir.nir, &tes->info, true); + + tes->prim_mode = tes->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; + tes->spacing = tes->info.properties[TGSI_PROPERTY_TES_SPACING]; + tes->vertex_order_cw = tes->info.properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; + tes->point_mode = tes->info.properties[TGSI_PROPERTY_TES_POINT_MODE]; + + tes->vector_length = 4; + + tes->position_output = -1; + for (unsigned i = 0; i < tes->info.num_outputs; i++) { + if (tes->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + tes->info.output_semantic_index[i] == 0) + tes->position_output = i; + if (tes->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) + tes->viewport_index_output = i; + if (tes->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { + debug_assert(tes->info.output_semantic_index[i] < + PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); + tes->ccdistance_output[tes->info.output_semantic_index[i]] = i; + } + } + +#ifdef LLVM_AVAILABLE + if (use_llvm) { + + tes->tes_input = align_malloc(sizeof(struct draw_tes_inputs), 16); + memset(tes->tes_input, 0, sizeof(struct draw_tes_inputs)); + + tes->jit_context = &draw->llvm->tes_jit_context; + llvm_tes->variant_key_size = + draw_tes_llvm_variant_key_size( + MAX2(tes->info.file_max[TGSI_FILE_SAMPLER]+1, + tes->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1), + tes->info.file_max[TGSI_FILE_IMAGE]+1); + } +#endif + return tes; +} + +void draw_bind_tess_eval_shader(struct draw_context *draw, + struct draw_tess_eval_shader *dtes) +{ + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + if (dtes) { + draw->tes.tess_eval_shader = dtes; + draw->tes.position_output = dtes->position_output; + } else { + draw->tes.tess_eval_shader = NULL; + } +} + +void draw_delete_tess_eval_shader(struct draw_context *draw, + struct draw_tess_eval_shader *dtes) +{ + if (!dtes) + return; + +#ifdef LLVM_AVAILABLE + if (draw->llvm) { + struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(dtes); + struct draw_tes_llvm_variant_list_item *li; + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_tes_llvm_variant_list_item *next = next_elem(li); + draw_tes_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + align_free(dtes->tes_input); + } +#endif + FREE(dtes); +} + +#ifdef LLVM_AVAILABLE +void draw_tes_set_current_variant(struct draw_tess_eval_shader *shader, + struct draw_tes_llvm_variant *variant) +{ + shader->current_variant = variant; +} +#endif + +enum pipe_prim_type get_tes_output_prim(struct draw_tess_eval_shader *shader) +{ + if (shader->point_mode) + return PIPE_PRIM_POINTS; + else if (shader->prim_mode == PIPE_PRIM_LINES) + return PIPE_PRIM_LINES; + else + return PIPE_PRIM_TRIANGLES; +} diff --git a/src/gallium/auxiliary/draw/draw_tess.h b/src/gallium/auxiliary/draw/draw_tess.h index 304677ca27d..29ae0847a35 100644 --- a/src/gallium/auxiliary/draw/draw_tess.h +++ b/src/gallium/auxiliary/draw/draw_tess.h @@ -35,6 +35,96 @@ struct draw_context; #define NUM_PATCH_INPUTS 32 #define NUM_TCS_INPUTS (PIPE_MAX_SHADER_INPUTS - NUM_PATCH_INPUTS) +struct draw_tcs_inputs { + /* num vertices per prim */ + float data[32][NUM_TCS_INPUTS][4]; +}; + +struct draw_tcs_outputs { + /* num vertices per prim */ + float data[32][PIPE_MAX_SHADER_INPUTS][4]; +}; + +struct draw_tes_inputs { + /* num vertices per prim */ + float data[32][PIPE_MAX_SHADER_INPUTS][4]; +}; + +#endif + +struct draw_tess_ctrl_shader { + struct draw_context *draw; + + struct pipe_shader_state state; + struct tgsi_shader_info info; + + unsigned vector_length; + unsigned vertices_out; + + unsigned input_vertex_stride; + const float (*input)[4]; + const struct tgsi_shader_info *input_info; +#ifdef LLVM_AVAILABLE + struct draw_tcs_inputs *tcs_input; + struct draw_tcs_outputs *tcs_output; + struct draw_tcs_jit_context *jit_context; + struct draw_tcs_llvm_variant *current_variant; +#endif +}; + +struct draw_tess_eval_shader { + struct draw_context *draw; + struct pipe_shader_state state; + struct tgsi_shader_info info; + + unsigned prim_mode; + unsigned spacing; + unsigned vertex_order_cw; + unsigned point_mode; + + unsigned position_output; + unsigned viewport_index_output; + unsigned ccdistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT]; + unsigned vector_length; + + unsigned input_vertex_stride; + const float (*input)[4]; + const struct tgsi_shader_info *input_info; + +#ifdef LLVM_AVAILABLE + struct draw_tes_inputs *tes_input; + struct draw_tes_jit_context *jit_context; + struct draw_tes_llvm_variant *current_variant; +#endif +}; + +enum pipe_prim_type get_tes_output_prim(struct draw_tess_eval_shader *shader); + +int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + const struct tgsi_shader_info *input_info, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ); + +int draw_tess_eval_shader_run(struct draw_tess_eval_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + unsigned num_input_vertices_per_patch, + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + const struct tgsi_shader_info *input_info, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims, + ushort **elts_out); + +#ifdef LLVM_AVAILABLE +void draw_tcs_set_current_variant(struct draw_tess_ctrl_shader *shader, + struct draw_tcs_llvm_variant *variant); +void draw_tes_set_current_variant(struct draw_tess_eval_shader *shader, + struct draw_tes_llvm_variant *variant); #endif #endif diff --git a/src/gallium/auxiliary/meson.build b/src/gallium/auxiliary/meson.build index d5c9bce0880..e79d2ac8eca 100644 --- a/src/gallium/auxiliary/meson.build +++ b/src/gallium/auxiliary/meson.build @@ -70,6 +70,8 @@ files_libgallium = files( 'draw/draw_pt_vsplit_tmp.h', 'draw/draw_so_emit_tmp.h', 'draw/draw_split_tmp.h', + 'draw/draw_tess.c', + 'draw/draw_tess.h', 'draw/draw_vbuf.h', 'draw/draw_vertex.c', 'draw/draw_vertex.h', -- 2.30.2