X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_state_fs.c;h=6c34ab98453a74148b6df747953bbcf62953ab57;hb=a30db60edeb7814415b04537cee8cc306ad11fd7;hp=9a43f01738ba303c22a234dd6f49cd325165d979;hpb=57a341b0a94d37e2aee5380703d171c422d8550e;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9a43f01738b..6c34ab98453 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -62,12 +62,12 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pointer.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "util/u_dump.h" #include "util/u_string.h" #include "util/simple_list.h" #include "util/u_dual_blend.h" -#include "os/os_time.h" +#include "util/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" @@ -80,10 +80,12 @@ #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_nir.h" #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_pack.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_quad.h" @@ -101,7 +103,7 @@ #include "lp_flush.h" #include "lp_state_fs.h" #include "lp_rast.h" - +#include "nir/nir_to_tgsi_info.h" /** Fragment shader number (for debugging) */ static unsigned fs_no = 0; @@ -298,7 +300,8 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMValueRef context_ptr, LLVMValueRef num_loop, struct lp_build_interp_soa_context *interp, - struct lp_build_sampler_soa *sampler, + const struct lp_build_sampler_soa *sampler, + const struct lp_build_image_soa *image, LLVMValueRef mask_store, LLVMValueRef (*out_color)[4], LLVMValueRef depth_ptr, @@ -312,6 +315,7 @@ generate_fs_loop(struct gallivm_state *gallivm, LLVMTypeRef vec_type, int_vec_type; LLVMValueRef mask_ptr, mask_val; LLVMValueRef consts_ptr, num_consts_ptr; + LLVMValueRef ssbo_ptr, num_ssbo_ptr; LLVMValueRef z; LLVMValueRef z_value, s_value; LLVMValueRef z_fb, s_fb; @@ -338,16 +342,25 @@ generate_fs_loop(struct gallivm_state *gallivm, memset(&system_values, 0, sizeof(system_values)); + /* truncate then sign extend. */ + system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), ""); + system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), ""); + if (key->depth.enabled || key->stencil[0].enabled) { zs_format_desc = util_format_description(key->zsbuf_format); assert(zs_format_desc); - if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) { - if (key->alpha.enabled || + if (shader->info.base.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + else if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) { + if (shader->info.base.writes_memory) + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + else if (key->alpha.enabled || key->blend.alpha_to_coverage || - shader->info.base.uses_kill) { + shader->info.base.uses_kill || + shader->info.base.writes_samplemask) { /* With alpha test and kill, can do the depth test early * and hopefully eliminate some quads. But need to do a * special deferred depth write once the final mask value @@ -390,6 +403,9 @@ generate_fs_loop(struct gallivm_state *gallivm, consts_ptr = lp_jit_context_constants(gallivm, context_ptr); num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr); + ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr); + num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr); + lp_build_for_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0), LLVMIntULT, @@ -472,12 +488,30 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter); + struct lp_build_tgsi_params params; + memset(¶ms, 0, sizeof(params)); + + params.type = type; + params.mask = &mask; + params.consts_ptr = consts_ptr; + params.const_sizes_ptr = num_consts_ptr; + params.system_values = &system_values; + params.inputs = interp->inputs; + params.context_ptr = context_ptr; + params.thread_data_ptr = thread_data_ptr; + params.sampler = sampler; + params.info = &shader->info.base; + params.ssbo_ptr = ssbo_ptr; + params.ssbo_sizes_ptr = num_ssbo_ptr; + params.image = image; + /* Build the actual shader */ - lp_build_tgsi_soa(gallivm, tokens, type, &mask, - consts_ptr, num_consts_ptr, &system_values, - interp->inputs, - outputs, context_ptr, thread_data_ptr, - sampler, &shader->info.base, NULL); + if (shader->base.type == PIPE_SHADER_IR_TGSI) + lp_build_tgsi_soa(gallivm, tokens, ¶ms, + outputs); + else + lp_build_nir_soa(gallivm, shader->base.ir.nir, ¶ms, + outputs); /* Alpha test */ if (key->alpha.enabled) { @@ -516,6 +550,25 @@ generate_fs_loop(struct gallivm_state *gallivm, } } + if (shader->info.base.writes_samplemask) { + int smaski = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_SAMPLEMASK, + 0); + LLVMValueRef smask; + struct lp_build_context smask_bld; + lp_build_context_init(&smask_bld, gallivm, int_type); + + assert(smaski >= 0); + smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask"); + /* + * Pixel is alive according to the first sample in the mask. + */ + smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, ""); + smask = lp_build_and(&smask_bld, smask, smask_bld.one); + smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero); + lp_build_mask_update(&mask, smask); + } + /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { int pos0 = find_output_by_semantic(&shader->info.base, @@ -1355,7 +1408,7 @@ convert_to_blend_type(struct gallivm_state *gallivm, for (j = 0; j < src_fmt->nr_channels; ++j) { unsigned mask = 0; unsigned sa = src_fmt->channel[j].shift; -#ifdef PIPE_ARCH_LITTLE_ENDIAN +#if UTIL_ARCH_LITTLE_ENDIAN unsigned from_lsb = j; #else unsigned from_lsb = src_fmt->nr_channels - j - 1; @@ -1537,7 +1590,8 @@ convert_from_blend_type(struct gallivm_state *gallivm, for (j = 0; j < src_fmt->nr_channels; ++j) { unsigned mask = 0; unsigned sa = src_fmt->channel[j].shift; -#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned sz_a = src_fmt->channel[j].size; +#if UTIL_ARCH_LITTLE_ENDIAN unsigned from_lsb = j; #else unsigned from_lsb = src_fmt->nr_channels - j - 1; @@ -1565,6 +1619,10 @@ convert_from_blend_type(struct gallivm_state *gallivm, if (src_type.norm) { chans[j] = scale_bits(gallivm, blend_type.width, src_fmt->channel[j].size, chans[j], src_type); + } else if (!src_type.floating && sz_a < blend_type.width) { + LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1); + LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, ""); + chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], ""); } /* Insert bits */ @@ -2219,7 +2277,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, if (dst_count > src_count) { if ((dst_type.width == 8 || dst_type.width == 16) && - util_is_power_of_two(dst_type.length) && + util_is_power_of_two_or_zero(dst_type.length) && dst_type.length * dst_type.width < 128) { /* * Never try to load values as 4xi8 which we will then @@ -2382,14 +2440,14 @@ generate_fragment(struct llvmpipe_context *lp, unsigned partial_mask) { struct gallivm_state *gallivm = variant->gallivm; - const struct lp_fragment_shader_variant_key *key = &variant->key; + struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[64]; struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; LLVMTypeRef blend_vec_type; - LLVMTypeRef arg_types[13]; + LLVMTypeRef arg_types[15]; LLVMTypeRef func_type; LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); @@ -2401,13 +2459,16 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef dady_ptr; LLVMValueRef color_ptr_ptr; LLVMValueRef stride_ptr; + LLVMValueRef color_sample_stride_ptr; LLVMValueRef depth_ptr; LLVMValueRef depth_stride; + LLVMValueRef depth_sample_stride; LLVMValueRef mask_input; LLVMValueRef thread_data_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; + struct lp_build_image_soa *image; struct lp_build_interp_soa_context interp; LLVMValueRef fs_mask[16 / 4]; LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; @@ -2465,8 +2526,8 @@ generate_fragment(struct llvmpipe_context *lp, blend_vec_type = lp_build_vec_type(gallivm, blend_type); - util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", - shader->no, variant->no, partial_mask ? "partial" : "whole"); + snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", + shader->no, variant->no, partial_mask ? "partial" : "whole"); arg_types[0] = variant->jit_context_ptr_type; /* context */ arg_types[1] = int32_type; /* x */ @@ -2481,6 +2542,8 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */ arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */ arg_types[12] = int32_type; /* depth_stride */ + arg_types[13] = LLVMPointerType(int32_type, 0); /* color sample strides */ + arg_types[14] = int32_type; /* depth sample stride */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), arg_types, ARRAY_SIZE(arg_types), 0); @@ -2510,6 +2573,8 @@ generate_fragment(struct llvmpipe_context *lp, thread_data_ptr = LLVMGetParam(function, 10); stride_ptr = LLVMGetParam(function, 11); depth_stride = LLVMGetParam(function, 12); + color_sample_stride_ptr = LLVMGetParam(function, 13); + depth_sample_stride = LLVMGetParam(function, 14); lp_build_name(context_ptr, "context"); lp_build_name(x, "x"); @@ -2523,6 +2588,8 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(stride_ptr, "stride_ptr"); lp_build_name(depth_stride, "depth_stride"); + lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr"); + lp_build_name(depth_sample_stride, "depth_sample_stride"); /* * Function body @@ -2533,8 +2600,28 @@ generate_fragment(struct llvmpipe_context *lp, assert(builder); LLVMPositionBuilderAtEnd(builder, block); + /* + * Must not count ps invocations if there's a null shader. + * (It would be ok to count with null shader if there's d/s tests, + * but only if there's d/s buffers too, which is different + * to implicit rasterization disable which must not depend + * on the d/s buffers.) + * Could use popcount on mask, but pixel accuracy is not required. + * Could disable if there's no stats query, but maybe not worth it. + */ + if (shader->info.base.num_instructions > 1) { + LLVMValueRef invocs, val; + invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr); + val = LLVMBuildLoad(builder, invocs, ""); + val = LLVMBuildAdd(builder, val, + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0), + "invoc_count"); + LLVMBuildStore(builder, val, invocs); + } + /* code generated texture sampling */ - sampler = lp_llvm_sampler_soa_create(key->state); + sampler = lp_llvm_sampler_soa_create(key->samplers); + image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key)); num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ /* for 1d resources only run "upper half" of stamp */ @@ -2589,6 +2676,7 @@ generate_fragment(struct llvmpipe_context *lp, num_loop, &interp, sampler, + image, mask_store, /* output */ color_store, depth_ptr, @@ -2623,7 +2711,7 @@ generate_fragment(struct llvmpipe_context *lp, } sampler->destroy(sampler); - + image->destroy(image); /* Loop over color outputs / color buffers to do blending. */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { @@ -2663,7 +2751,7 @@ generate_fragment(struct llvmpipe_context *lp, static void -dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) +dump_fs_variant_key(struct lp_fragment_shader_variant_key *key) { unsigned i; @@ -2718,7 +2806,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf("blend.alpha_to_coverage is enabled\n"); } for (i = 0; i < key->nr_samplers; ++i) { - const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state; + const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state; debug_printf("sampler[%u] = \n", i); debug_printf(" .wrap = %s %s %s\n", util_str_tex_wrap(sampler->wrap_s, TRUE), @@ -2739,7 +2827,7 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod); } for (i = 0; i < key->nr_sampler_views; ++i) { - const struct lp_static_texture_state *texture = &key->state[i].texture_state; + const struct lp_static_texture_state *texture = &key->samplers[i].texture_state; debug_printf("texture[%u] = \n", i); debug_printf(" .format = %s\n", util_format_name(texture->format)); @@ -2752,15 +2840,33 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) texture->pot_height, texture->pot_depth); } + struct lp_image_static_state *images = lp_fs_variant_key_images(key); + for (i = 0; i < key->nr_images; ++i) { + const struct lp_static_texture_state *image = &images[i].image_state; + debug_printf("image[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(image->format)); + debug_printf(" .target = %s\n", + util_str_tex_target(image->target, TRUE)); + debug_printf(" .level_zero_only = %u\n", + image->level_zero_only); + debug_printf(" .pot = %u %u %u\n", + image->pot_width, + image->pot_height, + image->pot_depth); + } } void -lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) +lp_debug_fs_variant(struct lp_fragment_shader_variant *variant) { - debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", + debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", variant->shader->no, variant->no); - tgsi_dump(variant->shader->base.tokens, 0); + if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) + tgsi_dump(variant->shader->base.tokens, 0); + else + nir_print_shader(variant->shader->base.ir.nir, stderr); dump_fs_variant_key(&variant->key); debug_printf("variant->opaque = %u\n", variant->opaque); debug_printf("\n"); @@ -2777,16 +2883,17 @@ generate_variant(struct llvmpipe_context *lp, const struct lp_fragment_shader_variant_key *key) { struct lp_fragment_shader_variant *variant; - const struct util_format_description *cbuf0_format_desc; + const struct util_format_description *cbuf0_format_desc = NULL; boolean fullcolormask; char module_name[64]; - variant = CALLOC_STRUCT(lp_fragment_shader_variant); + variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key); if (!variant) return NULL; - util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u", - shader->no, shader->variants_created); + memset(variant, 0, sizeof(*variant)); + snprintf(module_name, sizeof(module_name), "fs%u_variant%u", + shader->no, shader->variants_created); variant->gallivm = gallivm_create(module_name, lp->context); if (!variant->gallivm) { @@ -2818,16 +2925,10 @@ generate_variant(struct llvmpipe_context *lp, !key->alpha.enabled && !key->blend.alpha_to_coverage && !key->depth.enabled && - !shader->info.base.uses_kill + !shader->info.base.uses_kill && + !shader->info.base.writes_samplemask ? TRUE : FALSE; - if ((shader->info.base.num_tokens <= 1) && - !key->depth.enabled && !key->stencil[0].enabled) { - variant->ps_inv_multiplier = 0; - } else { - variant->ps_inv_multiplier = 1; - } - if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { lp_debug_fs_variant(variant); } @@ -2880,6 +2981,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, struct lp_fragment_shader *shader; int nr_samplers; int nr_sampler_views; + int nr_images; int i; shader = CALLOC_STRUCT(lp_fragment_shader); @@ -2889,11 +2991,17 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, shader->no = fs_no++; make_empty_list(&shader->variants); - /* get/save the summary info for this shader */ - lp_build_tgsi_info(templ->tokens, &shader->info); + shader->base.type = templ->type; + if (templ->type == PIPE_SHADER_IR_TGSI) { + /* get/save the summary info for this shader */ + lp_build_tgsi_info(templ->tokens, &shader->info); - /* we need to keep a local copy of the tokens */ - shader->base.tokens = tgsi_dup_tokens(templ->tokens); + /* we need to keep a local copy of the tokens */ + shader->base.tokens = tgsi_dup_tokens(templ->tokens); + } else { + shader->base.ir.nir = templ->ir.nir; + nir_tgsi_scan_shader(templ->ir.nir, &shader->info.base, true); + } shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ); if (shader->draw_data == NULL) { @@ -2904,9 +3012,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; - - shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, - state[MAX2(nr_samplers, nr_sampler_views)]); + nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1; + shader->variant_key_size = lp_fs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images); for (i = 0; i < shader->info.base.num_inputs; i++) { shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; @@ -2972,14 +3079,14 @@ static void llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - - if (llvmpipe->fs == fs) + struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs; + if (llvmpipe->fs == lp_fs) return; - llvmpipe->fs = (struct lp_fragment_shader *) fs; - draw_bind_fragment_shader(llvmpipe->draw, - (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + (lp_fs ? lp_fs->draw_data : NULL)); + + llvmpipe->fs = lp_fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -2989,7 +3096,7 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) * Remove shader variant from two lists: the shader's variant list * and the context's variant list. */ -void +static void llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, struct lp_fragment_shader_variant *variant) { @@ -3044,6 +3151,8 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) /* Delete draw module's data */ draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + if (shader->base.ir.nir) + ralloc_free(shader->base.ir.nir); assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); @@ -3073,7 +3182,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if (shader == PIPE_SHADER_VERTEX || - shader == PIPE_SHADER_GEOMETRY) { + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { /* Pass the constants to the 'draw' module */ const unsigned size = cb ? cb->buffer_size : 0; const ubyte *data; @@ -3094,15 +3205,78 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_set_mapped_constant_buffer(llvmpipe->draw, shader, index, data, size); } - else { + else if (shader == PIPE_SHADER_COMPUTE) + llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS; + else llvmpipe->dirty |= LP_NEW_FS_CONSTANTS; - } if (cb && cb->user_buffer) { pipe_resource_reference(&constants, NULL); } } +static void +llvmpipe_set_shader_buffers(struct pipe_context *pipe, + enum pipe_shader_type shader, unsigned start_slot, + unsigned count, const struct pipe_shader_buffer *buffers, + unsigned writable_bitmask) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i, idx; + for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { + const struct pipe_shader_buffer *buffer = buffers ? &buffers[idx] : NULL; + + util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer); + + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { + const unsigned size = buffer ? buffer->buffer_size : 0; + const ubyte *data = NULL; + if (buffer && buffer->buffer) + data = (ubyte *) llvmpipe_resource_data(buffer->buffer); + if (data) + data += buffer->buffer_offset; + draw_set_mapped_shader_buffer(llvmpipe->draw, shader, + i, data, size); + } else if (shader == PIPE_SHADER_COMPUTE) { + llvmpipe->cs_dirty |= LP_CSNEW_SSBOS; + } else if (shader == PIPE_SHADER_FRAGMENT) { + llvmpipe->dirty |= LP_NEW_FS_SSBOS; + } + } +} + +static void +llvmpipe_set_shader_images(struct pipe_context *pipe, + enum pipe_shader_type shader, unsigned start_slot, + unsigned count, const struct pipe_image_view *images) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i, idx; + + draw_flush(llvmpipe->draw); + for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) { + const struct pipe_image_view *image = images ? &images[idx] : NULL; + + util_copy_image_view(&llvmpipe->images[shader][i], image); + } + + llvmpipe->num_images[shader] = start_slot + count; + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_TESS_CTRL || + shader == PIPE_SHADER_TESS_EVAL) { + draw_set_images(llvmpipe->draw, + shader, + llvmpipe->images[shader], + start_slot + count); + } else if (shader == PIPE_SHADER_COMPUTE) + llvmpipe->cs_dirty |= LP_CSNEW_IMAGES; + else + llvmpipe->dirty |= LP_NEW_FS_IMAGES; +} /** * Return the blend factor equivalent to a destination alpha of one. @@ -3133,14 +3307,17 @@ force_dst_alpha_one(unsigned factor, boolean clamped_zero) * TODO: there is actually no reason to tie this to context state -- the * generated code could be cached globally in the screen. */ -static void +static struct lp_fragment_shader_variant_key * make_variant_key(struct llvmpipe_context *lp, struct lp_fragment_shader *shader, - struct lp_fragment_shader_variant_key *key) + char *store) { unsigned i; + struct lp_fragment_shader_variant_key *key; - memset(key, 0, shader->variant_key_size); + key = (struct lp_fragment_shader_variant_key *)store; + + memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1])); if (lp->framebuffer.zsbuf) { enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format; @@ -3178,7 +3355,7 @@ make_variant_key(struct llvmpipe_context *lp, if (lp->rasterizer->clip_halfz) { key->depth_clamp = 1; } else { - key->depth_clamp = (lp->rasterizer->depth_clip == 0) ? 1 : 0; + key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0; } /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */ @@ -3192,7 +3369,7 @@ make_variant_key(struct llvmpipe_context *lp, /* alpha.ref_value is passed in jit_context */ key->flatshade = lp->rasterizer->flatshade; - if (lp->active_occlusion_queries) { + if (lp->active_occlusion_queries && !lp->queries_disabled) { key->occlusion_count = TRUE; } @@ -3286,9 +3463,15 @@ make_variant_key(struct llvmpipe_context *lp, */ key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; + struct lp_sampler_static_state *fs_sampler; + + fs_sampler = key->samplers; + + memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler); + for(i = 0; i < key->nr_samplers; ++i) { if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { - lp_sampler_static_sampler_state(&key->state[i].sampler_state, + lp_sampler_static_sampler_state(&fs_sampler[i].sampler_state, lp->samplers[PIPE_SHADER_FRAGMENT][i]); } } @@ -3301,8 +3484,13 @@ make_variant_key(struct llvmpipe_context *lp, if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; for(i = 0; i < key->nr_sampler_views; ++i) { - if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { - lp_sampler_static_texture_state(&key->state[i].texture_state, + /* + * Note sview may exceed what's representable by file_mask. + * This will still work, the only downside is that not actually + * used views may be included in the shader key. + */ + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { + lp_sampler_static_texture_state(&fs_sampler[i].texture_state, lp->sampler_views[PIPE_SHADER_FRAGMENT][i]); } } @@ -3311,11 +3499,22 @@ make_variant_key(struct llvmpipe_context *lp, key->nr_sampler_views = key->nr_samplers; for(i = 0; i < key->nr_sampler_views; ++i) { if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { - lp_sampler_static_texture_state(&key->state[i].texture_state, + lp_sampler_static_texture_state(&fs_sampler[i].texture_state, lp->sampler_views[PIPE_SHADER_FRAGMENT][i]); } } } + + struct lp_image_static_state *lp_image; + lp_image = lp_fs_variant_key_images(key); + key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1; + for (i = 0; i < key->nr_images; ++i) { + if (shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) { + lp_sampler_static_texture_state_image(&lp_image[i].image_state, + &lp->images[PIPE_SHADER_FRAGMENT][i]); + } + } + return key; } @@ -3328,16 +3527,17 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp) { struct lp_fragment_shader *shader = lp->fs; - struct lp_fragment_shader_variant_key key; + struct lp_fragment_shader_variant_key *key; struct lp_fragment_shader_variant *variant = NULL; struct lp_fs_variant_list_item *li; + char store[LP_FS_MAX_VARIANT_KEY_SIZE]; - make_variant_key(lp, shader, &key); + key = make_variant_key(lp, shader, store); /* Search the variants for one which matches the key */ li = first_elem(&shader->variants); while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { + if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) { variant = li->base; break; } @@ -3409,7 +3609,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) * Generate the new variant. */ t0 = os_time_get(); - variant = generate_variant(lp, shader, &key); + variant = generate_variant(lp, shader, key); t1 = os_time_get(); dt = t1 - t0; LP_COUNT_ADD(llvm_compile_time, dt); @@ -3441,19 +3641,9 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; + + llvmpipe->pipe.set_shader_buffers = llvmpipe_set_shader_buffers; + llvmpipe->pipe.set_shader_images = llvmpipe_set_shader_images; } -/* - * Rasterization is disabled if there is no pixel shader and - * both depth and stencil testing are disabled: - * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125 - */ -boolean -llvmpipe_rasterization_disabled(struct llvmpipe_context *lp) -{ - boolean null_fs = !lp->fs || lp->fs->info.base.num_tokens <= 1; - return (null_fs && - !lp->depth_stencil->depth.enabled && - !lp->depth_stencil->stencil[0].enabled); -}