#include "gallivm/lp_bld_pack.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_quad.h"
+#include "gallivm/lp_bld_gather.h"
#include "lp_bld_alpha.h"
#include "lp_bld_blend.h"
#include "lp_rast.h"
#include "nir/nir_to_tgsi_info.h"
+#include "lp_screen.h"
+#include "compiler/nir/nir_serialize.h"
+#include "util/mesa-sha1.h"
/** Fragment shader number (for debugging) */
static unsigned fs_no = 0;
+static void
+load_unswizzled_block(struct gallivm_state *gallivm,
+ LLVMValueRef base_ptr,
+ LLVMValueRef stride,
+ unsigned block_width,
+ unsigned block_height,
+ LLVMValueRef* dst,
+ struct lp_type dst_type,
+ unsigned dst_count,
+ unsigned dst_alignment,
+ LLVMValueRef x_offset,
+ LLVMValueRef y_offset,
+ bool fb_fetch_twiddle);
+/**
+ * Checks if a format description is an arithmetic format
+ *
+ * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
+ */
+static inline boolean
+is_arithmetic_format(const struct util_format_description *format_desc)
+{
+ boolean arith = false;
+ unsigned i;
+
+ for (i = 0; i < format_desc->nr_channels; ++i) {
+ arith |= format_desc->channel[i].size != format_desc->channel[0].size;
+ arith |= (format_desc->channel[i].size % 8) != 0;
+ }
+
+ return arith;
+}
+
+/**
+ * Checks if this format requires special handling due to required expansion
+ * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
+ * SoA conversion.
+ */
+static inline boolean
+format_expands_to_float_soa(const struct util_format_description *format_desc)
+{
+ if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Retrieves the type representing the memory layout for a format
+ *
+ * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
+ */
+static inline void
+lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
+ struct lp_type* type)
+{
+ unsigned i;
+ unsigned chan;
+
+ if (format_expands_to_float_soa(format_desc)) {
+ /* just make this a uint with width of block */
+ type->floating = false;
+ type->fixed = false;
+ type->sign = false;
+ type->norm = false;
+ type->width = format_desc->block.bits;
+ type->length = 1;
+ return;
+ }
+
+ for (i = 0; i < 4; i++)
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+ chan = i;
+
+ memset(type, 0, sizeof(struct lp_type));
+ type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
+ type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
+ type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
+ type->norm = format_desc->channel[chan].normalized;
+
+ if (is_arithmetic_format(format_desc)) {
+ type->width = 0;
+ type->length = 1;
+
+ for (i = 0; i < format_desc->nr_channels; ++i) {
+ type->width += format_desc->channel[i].size;
+ }
+ } else {
+ type->width = format_desc->channel[chan].size;
+ type->length = format_desc->nr_channels;
+ }
+}
/**
* Expand the relevant bits of mask_input to a n*4-dword mask for the
s_mask = LLVMBuildAnd(builder, s_mask, test, "");
LLVMBuildStore(builder, s_mask, s_mask_ptr);
}
+};
+
+struct lp_build_fs_llvm_iface {
+ struct lp_build_fs_iface base;
+ struct lp_build_interp_soa_context *interp;
+ struct lp_build_for_loop_state *loop_state;
+ LLVMValueRef mask_store;
+ LLVMValueRef sample_id;
+ LLVMValueRef color_ptr_ptr;
+ LLVMValueRef color_stride_ptr;
+ LLVMValueRef color_sample_stride_ptr;
+ const struct lp_fragment_shader_variant_key *key;
+};
+
+static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
+ struct lp_build_context *bld,
+ unsigned attrib, unsigned chan,
+ bool centroid, bool sample,
+ LLVMValueRef attrib_indir,
+ LLVMValueRef offsets[2])
+{
+ struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+ struct lp_build_interp_soa_context *interp = fs_iface->interp;
+ unsigned loc = TGSI_INTERPOLATE_LOC_CENTER;
+ if (centroid)
+ loc = TGSI_INTERPOLATE_LOC_CENTROID;
+ if (sample)
+ loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+ return lp_build_interp_soa(interp, bld->gallivm, fs_iface->loop_state->counter,
+ fs_iface->mask_store,
+ attrib, chan, loc, attrib_indir, offsets);
+}
+
+static void fs_fb_fetch(const struct lp_build_fs_iface *iface,
+ struct lp_build_context *bld,
+ unsigned cbuf,
+ LLVMValueRef result[4])
+{
+ struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+ struct gallivm_state *gallivm = bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct lp_fragment_shader_variant_key *key = fs_iface->key;
+ LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
+ LLVMValueRef color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_ptr_ptr, &index, 1, ""), "");
+ LLVMValueRef stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_stride_ptr, &index, 1, ""), "");
+
+ LLVMValueRef dst[4 * 4];
+ enum pipe_format cbuf_format = key->cbuf_format[cbuf];
+ const struct util_format_description* out_format_desc = util_format_description(cbuf_format);
+ struct lp_type dst_type;
+ unsigned block_size = bld->type.length;
+ unsigned block_height = key->resource_1d ? 1 : 2;
+ unsigned block_width = block_size / block_height;
+
+ lp_mem_type_from_format_desc(out_format_desc, &dst_type);
+
+ struct lp_type blend_type;
+ memset(&blend_type, 0, sizeof blend_type);
+ blend_type.floating = FALSE; /* values are integers */
+ blend_type.sign = FALSE; /* values are unsigned */
+ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
+ blend_type.width = 8; /* 8-bit ubyte values */
+ blend_type.length = 16; /* 16 elements per vector */
+
+ uint32_t dst_alignment;
+ /*
+ * Compute the alignment of the destination pointer in bytes
+ * We fetch 1-4 pixels, if the format has pot alignment then those fetches
+ * are always aligned by MIN2(16, fetch_width) except for buffers (not
+ * 1d tex but can't distinguish here) so need to stick with per-pixel
+ * alignment in this case.
+ */
+ if (key->resource_1d) {
+ dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+ }
+ else {
+ dst_alignment = dst_type.length * dst_type.width / 8;
+ }
+ /* Force power-of-two alignment by extracting only the least-significant-bit */
+ dst_alignment = 1 << (ffs(dst_alignment) - 1);
+ /*
+ * Resource base and stride pointers are aligned to 16 bytes, so that's
+ * the maximum alignment we can guarantee
+ */
+ dst_alignment = MIN2(16, dst_alignment);
+
+ LLVMTypeRef blend_vec_type = lp_build_vec_type(gallivm, blend_type);
+ color_ptr = LLVMBuildBitCast(builder, color_ptr, LLVMPointerType(blend_vec_type, 0), "");
+
+ if (key->multisample) {
+ LLVMValueRef sample_stride = LLVMBuildLoad(builder,
+ LLVMBuildGEP(builder, fs_iface->color_sample_stride_ptr,
+ &index, 1, ""), "");
+ LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, fs_iface->sample_id, "");
+ color_ptr = LLVMBuildGEP(builder, color_ptr, &sample_offset, 1, "");
+ }
+ /* fragment shader executes on 4x4 blocks. depending on vector width it can execute 2 or 4 iterations.
+ * only move to the next row once the top row has completed 8 wide 1 iteration, 4 wide 2 iterations */
+ LLVMValueRef x_offset = NULL, y_offset = NULL;
+ if (!key->resource_1d) {
+ LLVMValueRef counter = fs_iface->loop_state->counter;
+
+ if (block_size == 4) {
+ x_offset = LLVMBuildShl(builder,
+ LLVMBuildAnd(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), ""),
+ lp_build_const_int32(gallivm, 1), "");
+ counter = LLVMBuildLShr(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), "");
+ }
+ y_offset = LLVMBuildMul(builder, counter, lp_build_const_int32(gallivm, 2), "");
+ }
+ load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, block_size, dst_alignment, x_offset, y_offset, true);
+
+ for (unsigned i = 0; i < block_size; i++) {
+ dst[i] = LLVMBuildBitCast(builder, dst[i], LLVMInt32TypeInContext(gallivm->context), "");
+ }
+ LLVMValueRef packed = lp_build_gather_values(gallivm, dst, block_size);
+
+ struct lp_type texel_type = bld->type;
+ if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ out_format_desc->channel[0].pure_integer) {
+ if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ texel_type = lp_type_int_vec(bld->type.width, bld->type.width * bld->type.length);
+ }
+ else if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ texel_type = lp_type_uint_vec(bld->type.width, bld->type.width * bld->type.length);
+ }
+ }
+ lp_build_unpack_rgba_soa(gallivm, out_format_desc,
+ texel_type,
+ packed, result);
}
/**
LLVMValueRef depth_base_ptr,
LLVMValueRef depth_stride,
LLVMValueRef depth_sample_stride,
+ LLVMValueRef color_ptr_ptr,
+ LLVMValueRef color_stride_ptr,
+ LLVMValueRef color_sample_stride_ptr,
LLVMValueRef facing,
LLVMValueRef thread_data_ptr)
{
shader->info.base.num_instructions < 8) && 0;
const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
util_blend_state_is_dual(&key->blend, 0);
+ const bool post_depth_coverage = shader->info.base.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE];
unsigned attrib;
unsigned chan;
unsigned cbuf;
num_loop,
lp_build_const_int32(gallivm, 1));
+ LLVMValueRef sample_mask_in;
if (key->multisample) {
+ sample_mask_in = lp_build_const_int_vec(gallivm, type, 0);
/* create shader execution mask by combining all sample masks. */
for (unsigned s = 0; s < key->coverage_samples; s++) {
LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), "");
mask_val = s_mask;
else
mask_val = LLVMBuildOr(builder, s_mask, mask_val, "");
+
+ LLVMValueRef mask_in = LLVMBuildAnd(builder, s_mask, lp_build_const_int_vec(gallivm, type, (1 << s)), "");
+ sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
}
} else {
+ sample_mask_in = lp_build_const_int_vec(gallivm, type, 1);
mask_ptr = LLVMBuildGEP(builder, mask_store,
&loop_state.counter, 1, "mask_ptr");
mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+ LLVMValueRef mask_in = LLVMBuildAnd(builder, mask_val, lp_build_const_int_vec(gallivm, type, 1), "");
+ sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
LLVMValueRef s_mask_or = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "cov_mask_early_depth");
LLVMBuildStore(builder, LLVMConstNull(lp_build_int_vec_type(gallivm, type)), s_mask_or);
+ /* Create storage for post depth sample mask */
+ LLVMValueRef post_depth_sample_mask_in = NULL;
+ if (post_depth_coverage)
+ post_depth_sample_mask_in = lp_build_alloca(gallivm, int_vec_type, "post_depth_sample_mask_in");
+
LLVMValueRef s_mask = NULL, s_mask_ptr = NULL;
LLVMValueRef z_sample_value_store = NULL, s_sample_value_store = NULL;
LLVMValueRef z_fb_store = NULL, s_fb_store = NULL;
struct lp_type s_type = zs_type;
if (zs_format_desc->block.bits < type.width)
z_type.width = type.width;
+ if (zs_format_desc->block.bits == 8)
+ s_type.width = type.width;
+
else if (zs_format_desc->block.bits > 32) {
z_type.width = z_type.width / 2;
s_type.width = s_type.width / 2;
tmp_s_mask_or = LLVMBuildOr(builder, tmp_s_mask_or, s_mask, "");
LLVMBuildStore(builder, tmp_s_mask_or, s_mask_or);
+ if (post_depth_coverage) {
+ LLVMValueRef mask_bit_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+ LLVMValueRef post_depth_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, "");
+ mask_bit_idx = LLVMBuildAnd(builder, s_mask, lp_build_broadcast(gallivm, int_vec_type, mask_bit_idx), "");
+ post_depth_mask_in = LLVMBuildOr(builder, post_depth_mask_in, mask_bit_idx, "");
+ LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in);
+ }
+
LLVMBuildStore(builder, s_mask, s_mask_ptr);
lp_build_for_loop_end(&sample_loop_state);
lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
lp_build_mask_update(&mask, tmp_s_mask_or);
}
+ } else {
+ if (post_depth_coverage) {
+ LLVMValueRef post_depth_mask_in = LLVMBuildAnd(builder, lp_build_mask_value(&mask), lp_build_const_int_vec(gallivm, type, 1), "");
+ LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in);
+ }
}
LLVMValueRef out_sample_mask_storage = NULL;
LLVMBuildStore(builder, LLVMConstNull(int_vec_type), out_sample_mask_storage);
}
+ if (post_depth_coverage) {
+ system_values.sample_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, "");
+ }
+ else
+ system_values.sample_mask_in = sample_mask_in;
if (key->multisample && key->min_samples > 1) {
lp_build_for_loop_begin(&sample_loop_state, gallivm,
lp_build_const_int32(gallivm, 0),
lp_build_mask_force(&mask, s_mask);
lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, sample_loop_state.counter);
system_values.sample_id = sample_loop_state.counter;
- } else
+ system_values.sample_mask_in = LLVMBuildAnd(builder, system_values.sample_mask_in,
+ lp_build_broadcast(gallivm, int_vec_type,
+ LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "")), "");
+ } else {
system_values.sample_id = lp_build_const_int32(gallivm, 0);
+ }
system_values.sample_pos = sample_pos_array;
lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, sample_loop_state.counter);
+ struct lp_build_fs_llvm_iface fs_iface = {
+ .base.interp_fn = fs_interp,
+ .base.fb_fetch = fs_fb_fetch,
+ .interp = interp,
+ .loop_state = &loop_state,
+ .sample_id = system_values.sample_id,
+ .mask_store = mask_store,
+ .color_ptr_ptr = color_ptr_ptr,
+ .color_stride_ptr = color_stride_ptr,
+ .color_sample_stride_ptr = color_sample_stride_ptr,
+ .key = key,
+ };
+
struct lp_build_tgsi_params params;
memset(¶ms, 0, sizeof(params));
params.type = type;
params.mask = &mask;
+ params.fs_iface = &fs_iface.base;
params.consts_ptr = consts_ptr;
params.const_sizes_ptr = num_consts_ptr;
params.system_values = &system_values;
LLVMValueRef* dst,
struct lp_type dst_type,
unsigned dst_count,
- unsigned dst_alignment)
+ unsigned dst_alignment,
+ LLVMValueRef x_offset,
+ LLVMValueRef y_offset,
+ bool fb_fetch_twiddle)
{
LLVMBuilderRef builder = gallivm->builder;
unsigned row_size = dst_count / block_height;
unsigned x = i % row_size;
unsigned y = i / row_size;
- LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
- LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, "");
+ if (block_height == 2 && dst_count == 8 && fb_fetch_twiddle) {
+ /* remap the raw slots into the fragment shader execution mode. */
+ /* this math took me way too long to work out, I'm sure it's overkill. */
+ x = (i & 1) + ((i >> 2) << 1);
+ y = (i & 2) >> 1;
+ }
+
+ LLVMValueRef x_val;
+ if (x_offset) {
+ x_val = lp_build_const_int32(gallivm, x);
+ if (x_offset)
+ x_val = LLVMBuildAdd(builder, x_val, x_offset, "");
+ x_val = LLVMBuildMul(builder, x_val, lp_build_const_int32(gallivm, (dst_type.width / 8) * dst_type.length), "");
+ } else
+ x_val = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
+
+ LLVMValueRef bx = x_val;
+
+ LLVMValueRef y_val = lp_build_const_int32(gallivm, y);
+ if (y_offset)
+ y_val = LLVMBuildAdd(builder, y_val, y_offset, "");
+ LLVMValueRef by = LLVMBuildMul(builder, y_val, stride, "");
LLVMValueRef gep[2];
LLVMValueRef dst_ptr;
}
-/**
- * Checks if a format description is an arithmetic format
- *
- * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
- */
-static inline boolean
-is_arithmetic_format(const struct util_format_description *format_desc)
-{
- boolean arith = false;
- unsigned i;
-
- for (i = 0; i < format_desc->nr_channels; ++i) {
- arith |= format_desc->channel[i].size != format_desc->channel[0].size;
- arith |= (format_desc->channel[i].size % 8) != 0;
- }
-
- return arith;
-}
-
-
-/**
- * Checks if this format requires special handling due to required expansion
- * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
- * SoA conversion.
- */
-static inline boolean
-format_expands_to_float_soa(const struct util_format_description *format_desc)
-{
- if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
- format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- return true;
- }
- return false;
-}
-
-
-/**
- * Retrieves the type representing the memory layout for a format
- *
- * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
- */
-static inline void
-lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
- struct lp_type* type)
-{
- unsigned i;
- unsigned chan;
-
- if (format_expands_to_float_soa(format_desc)) {
- /* just make this a uint with width of block */
- type->floating = false;
- type->fixed = false;
- type->sign = false;
- type->norm = false;
- type->width = format_desc->block.bits;
- type->length = 1;
- return;
- }
-
- for (i = 0; i < 4; i++)
- if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
- break;
- chan = i;
-
- memset(type, 0, sizeof(struct lp_type));
- type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
- type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
- type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
- type->norm = format_desc->channel[chan].normalized;
-
- if (is_arithmetic_format(format_desc)) {
- type->width = 0;
- type->length = 1;
-
- for (i = 0; i < format_desc->nr_channels; ++i) {
- type->width += format_desc->channel[i].size;
- }
- } else {
- type->width = format_desc->channel[chan].size;
- type->length = format_desc->nr_channels;
- }
-}
-
/**
* Retrieves the type for a format which is usable in the blending code.
if (is_1d) {
load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
- dst, ls_type, dst_count / 4, dst_alignment);
+ dst, ls_type, dst_count / 4, dst_alignment, NULL, NULL, false);
for (i = dst_count / 4; i < dst_count; i++) {
dst[i] = lp_build_undef(gallivm, ls_type);
}
}
else {
load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
- dst, ls_type, dst_count, dst_alignment);
+ dst, ls_type, dst_count, dst_alignment, NULL, NULL, false);
}
blend_vec_type = lp_build_vec_type(gallivm, blend_type);
- snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, partial_mask ? "partial" : "whole");
+ snprintf(func_name, sizeof(func_name), "fs_variant_%s",
+ partial_mask ? "partial" : "whole");
arg_types[0] = variant->jit_context_ptr_type; /* context */
arg_types[1] = int32_type; /* x */
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
+ if (variant->gallivm->cache->data_size)
+ return;
+
context_ptr = LLVMGetParam(function, 0);
x = LLVMGetParam(function, 1);
y = LLVMGetParam(function, 2);
}
/* code generated texture sampling */
- sampler = lp_llvm_sampler_soa_create(key->samplers);
- image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
+ sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers);
+ image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key), key->nr_images);
num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
/* for 1d resources only run "upper half" of stamp */
depth_ptr,
depth_stride,
depth_sample_stride,
+ color_ptr_ptr,
+ stride_ptr,
+ color_sample_stride_ptr,
facing,
thread_data_ptr);
debug_printf("\n");
}
+static void
+lp_fs_get_ir_cache_key(struct lp_fragment_shader_variant *variant,
+ unsigned char ir_sha1_cache_key[20])
+{
+ struct blob blob = { 0 };
+ unsigned ir_size;
+ void *ir_binary;
+
+ blob_init(&blob);
+ nir_serialize(&blob, variant->shader->base.ir.nir, true);
+ ir_binary = blob.data;
+ ir_size = blob.size;
+
+ struct mesa_sha1 ctx;
+ _mesa_sha1_init(&ctx);
+ _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
+ _mesa_sha1_update(&ctx, ir_binary, ir_size);
+ _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+ blob_finish(&blob);
+}
/**
* Generate a new fragment shader variant from the shader code and
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key)
{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader_variant *variant;
const struct util_format_description *cbuf0_format_desc = NULL;
boolean fullcolormask;
char module_name[64];
-
+ unsigned char ir_sha1_cache_key[20];
+ struct lp_cached_code cached = { 0 };
+ bool needs_caching = false;
variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
if (!variant)
return NULL;
snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
shader->no, shader->variants_created);
- variant->gallivm = gallivm_create(module_name, lp->context);
+ pipe_reference_init(&variant->reference, 1);
+ lp_fs_reference(lp, &variant->shader, shader);
+
+ memcpy(&variant->key, key, shader->variant_key_size);
+
+ if (shader->base.ir.nir) {
+ lp_fs_get_ir_cache_key(variant, ir_sha1_cache_key);
+
+ lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
+ if (!cached.data_size)
+ needs_caching = true;
+ }
+ variant->gallivm = gallivm_create(module_name, lp->context, &cached);
if (!variant->gallivm) {
FREE(variant);
return NULL;
}
- variant->shader = shader;
variant->list_item_global.base = variant;
variant->list_item_local.base = variant;
variant->no = shader->variants_created++;
- memcpy(&variant->key, key, shader->variant_key_size);
+
/*
* Determine whether we are touching all channels in the color buffer.
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
+ if (needs_caching) {
+ lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
+ }
+
gallivm_free_ir(variant->gallivm);
return variant;
if (!shader)
return NULL;
+ pipe_reference_init(&shader->reference, 1);
shader->no = fs_no++;
make_empty_list(&shader->variants);
draw_bind_fragment_shader(llvmpipe->draw,
(lp_fs ? lp_fs->draw_data : NULL));
- llvmpipe->fs = lp_fs;
+ lp_fs_reference(llvmpipe, &llvmpipe->fs, lp_fs);
+ /* invalidate the setup link, NEW_FS will make it update */
+ lp_setup_set_fs_variant(llvmpipe->setup, NULL);
llvmpipe->dirty |= LP_NEW_FS;
}
* Remove shader variant from two lists: the shader's variant list
* and the context's variant list.
*/
-static void
-llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
- struct lp_fragment_shader_variant *variant)
+
+static
+void llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant)
{
if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
debug_printf("llvmpipe: del fs #%u var %u v created %u v cached %u "
lp->nr_fs_variants, variant->nr_instrs, lp->nr_fs_instrs);
}
- gallivm_destroy(variant->gallivm);
-
/* remove from shader's list */
remove_from_list(&variant->list_item_local);
variant->shader->variants_cached--;
remove_from_list(&variant->list_item_global);
lp->nr_fs_variants--;
lp->nr_fs_instrs -= variant->nr_instrs;
+}
+
+void
+llvmpipe_destroy_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant)
+{
+ gallivm_destroy(variant->gallivm);
+
+ lp_fs_reference(lp, &variant->shader, NULL);
FREE(variant);
}
+void
+llvmpipe_destroy_fs(struct llvmpipe_context *llvmpipe,
+ struct lp_fragment_shader *shader)
+{
+ /* Delete draw module's data */
+ draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
+
+ if (shader->base.ir.nir)
+ ralloc_free(shader->base.ir.nir);
+ assert(shader->variants_cached == 0);
+ FREE((void *) shader->base.tokens);
+ FREE(shader);
+}
static void
llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
struct lp_fragment_shader *shader = fs;
struct lp_fs_variant_list_item *li;
- assert(fs != llvmpipe->fs);
-
- /*
- * XXX: we need to flush the context until we have some sort of reference
- * counting in fragment shaders as they may still be binned
- * Flushing alone might not sufficient we need to wait on it too.
- */
- llvmpipe_finish(pipe, __FUNCTION__);
-
/* Delete all the variants */
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
struct lp_fs_variant_list_item *next = next_elem(li);
+ struct lp_fragment_shader_variant *variant;
+ variant = li->base;
llvmpipe_remove_shader_variant(llvmpipe, li->base);
+ lp_fs_variant_reference(llvmpipe, &variant, NULL);
li = next;
}
- /* Delete draw module's data */
- draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
-
- if (shader->base.ir.nir)
- ralloc_free(shader->base.ir.nir);
- assert(shader->variants_cached == 0);
- FREE((void *) shader->base.tokens);
- FREE(shader);
+ lp_fs_reference(llvmpipe, &shader, NULL);
}
-
-
static void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
enum pipe_shader_type shader, uint index,
key->occlusion_count = TRUE;
}
- if (lp->framebuffer.nr_cbufs) {
- memcpy(&key->blend, lp->blend, sizeof key->blend);
- }
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
key->coverage_samples = 1;
key->min_samples = 1;
if (variants_to_cull ||
lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
- struct pipe_context *pipe = &lp->pipe;
-
if (gallivm_debug & GALLIVM_DEBUG_PERF) {
debug_printf("Evicting FS: %u fs variants,\t%u total variants,"
"\t%u instrs,\t%u instrs/variant\n",
lp->nr_fs_instrs / lp->nr_fs_variants);
}
- /*
- * XXX: we need to flush the context until we have some sort of
- * reference counting in fragment shaders as they may still be binned
- * Flushing alone might not be sufficient we need to wait on it too.
- */
- llvmpipe_finish(pipe, __FUNCTION__);
-
/*
* We need to re-check lp->nr_fs_variants because an arbitrarliy large
* number of shader variants (potentially all of them) could be
assert(item);
assert(item->base);
llvmpipe_remove_shader_variant(lp, item->base);
+ lp_fs_variant_reference(lp, &item->base, NULL);
}
}