#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_pointer.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_dump.h"
#include "util/u_string.h"
#include "util/simple_list.h"
#include "util/u_dual_blend.h"
-#include "os/os_time.h"
+#include "util/os_time.h"
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_dump.h"
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_nir.h"
#include "gallivm/lp_bld_swizzle.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_pack.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_quad.h"
#include "lp_flush.h"
#include "lp_state_fs.h"
#include "lp_rast.h"
-
+#include "nir/nir_to_tgsi_info.h"
/** Fragment shader number (for debugging) */
static unsigned fs_no = 0;
generate_quad_mask(struct gallivm_state *gallivm,
struct lp_type fs_type,
unsigned first_quad,
- LLVMValueRef mask_input) /* int32 */
+ unsigned sample,
+ LLVMValueRef mask_input) /* int64 */
{
LLVMBuilderRef builder = gallivm->builder;
struct lp_type mask_type;
shift = 0;
}
+ mask_input = LLVMBuildLShr(builder, mask_input, lp_build_const_int64(gallivm, 16 * sample), "");
+ mask_input = LLVMBuildTrunc(builder, mask_input,
+ i32t, "");
+ mask_input = LLVMBuildAnd(builder, mask_input, lp_build_const_int32(gallivm, 0xffff), "");
+
mask_input = LLVMBuildLShr(builder,
mask_input,
LLVMConstInt(i32t, shift, 0),
LLVMValueRef context_ptr,
LLVMValueRef num_loop,
struct lp_build_interp_soa_context *interp,
- struct lp_build_sampler_soa *sampler,
+ const struct lp_build_sampler_soa *sampler,
+ const struct lp_build_image_soa *image,
LLVMValueRef mask_store,
LLVMValueRef (*out_color)[4],
LLVMValueRef depth_ptr,
LLVMTypeRef vec_type, int_vec_type;
LLVMValueRef mask_ptr, mask_val;
LLVMValueRef consts_ptr, num_consts_ptr;
+ LLVMValueRef ssbo_ptr, num_ssbo_ptr;
LLVMValueRef z;
LLVMValueRef z_value, s_value;
LLVMValueRef z_fb, s_fb;
memset(&system_values, 0, sizeof(system_values));
+ /* truncate then sign extend. */
+ system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), "");
+ system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), "");
+
if (key->depth.enabled ||
key->stencil[0].enabled) {
zs_format_desc = util_format_description(key->zsbuf_format);
assert(zs_format_desc);
- if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
- if (key->alpha.enabled ||
+ if (shader->info.base.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ else if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
+ if (shader->info.base.writes_memory)
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else if (key->alpha.enabled ||
key->blend.alpha_to_coverage ||
- shader->info.base.uses_kill) {
+ shader->info.base.uses_kill ||
+ shader->info.base.writes_samplemask) {
/* With alpha test and kill, can do the depth test early
* and hopefully eliminate some quads. But need to do a
* special deferred depth write once the final mask value
consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr);
+ ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr);
+ num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr);
+
lp_build_for_loop_begin(&loop_state, gallivm,
lp_build_const_int32(gallivm, 0),
LLVMIntULT,
lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+ struct lp_build_tgsi_params params;
+ memset(¶ms, 0, sizeof(params));
+
+ params.type = type;
+ params.mask = &mask;
+ params.consts_ptr = consts_ptr;
+ params.const_sizes_ptr = num_consts_ptr;
+ params.system_values = &system_values;
+ params.inputs = interp->inputs;
+ params.context_ptr = context_ptr;
+ params.thread_data_ptr = thread_data_ptr;
+ params.sampler = sampler;
+ params.info = &shader->info.base;
+ params.ssbo_ptr = ssbo_ptr;
+ params.ssbo_sizes_ptr = num_ssbo_ptr;
+ params.image = image;
+
/* Build the actual shader */
- lp_build_tgsi_soa(gallivm, tokens, type, &mask,
- consts_ptr, num_consts_ptr, &system_values,
- interp->inputs,
- outputs, context_ptr, thread_data_ptr,
- sampler, &shader->info.base, NULL);
+ if (shader->base.type == PIPE_SHADER_IR_TGSI)
+ lp_build_tgsi_soa(gallivm, tokens, ¶ms,
+ outputs);
+ else
+ lp_build_nir_soa(gallivm, shader->base.ir.nir, ¶ms,
+ outputs);
/* Alpha test */
if (key->alpha.enabled) {
}
}
+ if (shader->info.base.writes_samplemask) {
+ int smaski = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_SAMPLEMASK,
+ 0);
+ LLVMValueRef smask;
+ struct lp_build_context smask_bld;
+ lp_build_context_init(&smask_bld, gallivm, int_type);
+
+ assert(smaski >= 0);
+ smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+ /*
+ * Pixel is alive according to the first sample in the mask.
+ */
+ smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, "");
+ smask = lp_build_and(&smask_bld, smask, smask_bld.one);
+ smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero);
+ lp_build_mask_update(&mask, smask);
+ }
+
/* Late Z test */
if (depth_mode & LATE_DEPTH_TEST) {
int pos0 = find_output_by_semantic(&shader->info.base,
}
} else if (twiddle) {
/* Twiddle pixels across elements of array */
+ /*
+ * XXX: we should avoid this in some cases, but would need to tell
+ * lp_build_conv to reorder (or deal with it ourselves).
+ */
lp_bld_quad_twiddle(gallivm, type, src, src_count, dst);
} else {
/* Do nothing */
}
+/*
+ * Untwiddle and transpose, much like the above.
+ * However, this is after conversion, so we get packed vectors.
+ * At this time only handle 4x16i8 rgba / 2x16i8 rg / 1x16i8 r data,
+ * the vectors will look like:
+ * r0r1r4r5r2r3r6r7r8r9r12... (albeit color channels may
+ * be swizzled here). Extending to 16bit should be trivial.
+ * Should also be extended to handle twice wide vectors with AVX2...
+ */
+static void
+fs_twiddle_transpose(struct gallivm_state *gallivm,
+ struct lp_type type,
+ LLVMValueRef *src,
+ unsigned src_count,
+ LLVMValueRef *dst)
+{
+ unsigned i, j;
+ struct lp_type type64, type16, type32;
+ LLVMTypeRef type64_t, type8_t, type16_t, type32_t;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef tmp[4], shuf[8];
+ for (j = 0; j < 2; j++) {
+ shuf[j*4 + 0] = lp_build_const_int32(gallivm, j*4 + 0);
+ shuf[j*4 + 1] = lp_build_const_int32(gallivm, j*4 + 2);
+ shuf[j*4 + 2] = lp_build_const_int32(gallivm, j*4 + 1);
+ shuf[j*4 + 3] = lp_build_const_int32(gallivm, j*4 + 3);
+ }
+
+ assert(src_count == 4 || src_count == 2 || src_count == 1);
+ assert(type.width == 8);
+ assert(type.length == 16);
+
+ type8_t = lp_build_vec_type(gallivm, type);
+
+ type64 = type;
+ type64.length /= 8;
+ type64.width *= 8;
+ type64_t = lp_build_vec_type(gallivm, type64);
+
+ type16 = type;
+ type16.length /= 2;
+ type16.width *= 2;
+ type16_t = lp_build_vec_type(gallivm, type16);
+
+ type32 = type;
+ type32.length /= 4;
+ type32.width *= 4;
+ type32_t = lp_build_vec_type(gallivm, type32);
+
+ lp_build_transpose_aos_n(gallivm, type, src, src_count, tmp);
+
+ if (src_count == 1) {
+ /* transpose was no-op, just untwiddle */
+ LLVMValueRef shuf_vec;
+ shuf_vec = LLVMConstVector(shuf, 8);
+ tmp[0] = LLVMBuildBitCast(builder, src[0], type16_t, "");
+ tmp[0] = LLVMBuildShuffleVector(builder, tmp[0], tmp[0], shuf_vec, "");
+ dst[0] = LLVMBuildBitCast(builder, tmp[0], type8_t, "");
+ } else if (src_count == 2) {
+ LLVMValueRef shuf_vec;
+ shuf_vec = LLVMConstVector(shuf, 4);
+
+ for (i = 0; i < 2; i++) {
+ tmp[i] = LLVMBuildBitCast(builder, tmp[i], type32_t, "");
+ tmp[i] = LLVMBuildShuffleVector(builder, tmp[i], tmp[i], shuf_vec, "");
+ dst[i] = LLVMBuildBitCast(builder, tmp[i], type8_t, "");
+ }
+ } else {
+ for (j = 0; j < 2; j++) {
+ LLVMValueRef lo, hi, lo2, hi2;
+ /*
+ * Note that if we only really have 3 valid channels (rgb)
+ * and we don't need alpha we could substitute a undef here
+ * for the respective channel (causing llvm to drop conversion
+ * for alpha).
+ */
+ /* we now have rgba0rgba1rgba4rgba5 etc, untwiddle */
+ lo2 = LLVMBuildBitCast(builder, tmp[j*2], type64_t, "");
+ hi2 = LLVMBuildBitCast(builder, tmp[j*2 + 1], type64_t, "");
+ lo = lp_build_interleave2(gallivm, type64, lo2, hi2, 0);
+ hi = lp_build_interleave2(gallivm, type64, lo2, hi2, 1);
+ dst[j*2] = LLVMBuildBitCast(builder, lo, type8_t, "");
+ dst[j*2 + 1] = LLVMBuildBitCast(builder, hi, type8_t, "");
+ }
+ }
+}
+
+
/**
* Load an unswizzled block of pixels from memory
*/
for (j = 0; j < src_fmt->nr_channels; ++j) {
unsigned mask = 0;
unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+#if UTIL_ARCH_LITTLE_ENDIAN
unsigned from_lsb = j;
#else
unsigned from_lsb = src_fmt->nr_channels - j - 1;
for (j = 0; j < src_fmt->nr_channels; ++j) {
unsigned mask = 0;
unsigned sa = src_fmt->channel[j].shift;
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned sz_a = src_fmt->channel[j].size;
+#if UTIL_ARCH_LITTLE_ENDIAN
unsigned from_lsb = j;
#else
unsigned from_lsb = src_fmt->nr_channels - j - 1;
/* Extract bits */
chans[j] = LLVMBuildLShr(builder,
dst[i],
- lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+ lp_build_const_int_vec(gallivm, src_type,
+ from_lsb * blend_type.width),
"");
chans[j] = LLVMBuildAnd(builder,
if (src_type.norm) {
chans[j] = scale_bits(gallivm, blend_type.width,
src_fmt->channel[j].size, chans[j], src_type);
+ } else if (!src_type.floating && sz_a < blend_type.width) {
+ LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1);
+ LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, "");
+ chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], "");
}
/* Insert bits */
/* If there is a src for each pixel broadcast the alpha across whole row */
if (src_count == block_size) {
for (i = 0; i < src_count; ++i) {
- src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+ src_alpha[i] = lp_build_broadcast(gallivm,
+ lp_build_vec_type(gallivm, row_type), src_alpha[i]);
}
} else {
unsigned pixels = block_size / src_count;
util_blend_state_is_dual(&variant->key.blend, 0);
const boolean is_1d = variant->key.resource_1d;
+ boolean twiddle_after_convert = FALSE;
unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
LLVMValueRef fpstate = 0;
}
/* If 3 channels then pad to include alpha for 4 element transpose */
- if (dst_channels == 3 && !has_alpha) {
+ if (dst_channels == 3) {
+ assert (!has_alpha);
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
if (swizzle[i] > TGSI_NUM_CHANNELS)
swizzle[i] = 3;
}
if (out_format_desc->nr_channels == 4) {
dst_channels = 4;
+ /*
+ * We use alpha from the color conversion, not separate one.
+ * We had to include it for transpose, hence it will get converted
+ * too (albeit when doing transpose after conversion, that would
+ * no longer be the case necessarily).
+ * (It works only with 4 channel dsts, e.g. rgbx formats, because
+ * otherwise we really have padding, not alpha, included.)
+ */
+ has_alpha = true;
}
}
/*
* XXX If we include that here maybe could actually use it instead of
* separate alpha for blending?
+ * (Difficult though we actually convert pad channels, not alpha.)
*/
if (dst_channels == 3 && !has_alpha) {
fs_src[i][3] = alpha;
/* We split the row_mask and row_alpha as we want 128bit interleave */
if (fs_type.length == 8) {
- src_mask[i*2 + 0] = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
- src_mask[i*2 + 1] = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+ src_mask[i*2 + 0] = lp_build_extract_range(gallivm, fs_mask[i],
+ 0, src_channels);
+ src_mask[i*2 + 1] = lp_build_extract_range(gallivm, fs_mask[i],
+ src_channels, src_channels);
src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
- src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+ src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+ src_channels, src_channels);
} else {
src_mask[i] = fs_mask[i];
src_alpha[i] = alpha;
}
if (fs_type.length == 8) {
src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
- src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+ src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+ src_channels, src_channels);
} else {
src1_alpha[i] = alpha;
}
}
}
+ /*
+ * We actually should generally do conversion first (for non-1d cases)
+ * when the blend format is 8 or 16 bits. The reason is obvious,
+ * there's 2 or 4 times less vectors to deal with for the interleave...
+ * Albeit for the AVX (not AVX2) case there's no benefit with 16 bit
+ * vectors (as it can do 32bit unpack with 256bit vectors, but 8/16bit
+ * unpack only with 128bit vectors).
+ * Note: for 16bit sizes really need matching pack conversion code
+ */
+ if (!is_1d && dst_channels != 3 && dst_type.width == 8) {
+ twiddle_after_convert = TRUE;
+ }
+
/*
* Pixel twiddle from fragment shader order to memory order
*/
- src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs,
- dst_channels, fs_src, src, pad_inline);
- if (dual_source_blend) {
- generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels,
- fs_src1, src1, pad_inline);
+ if (!twiddle_after_convert) {
+ src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs,
+ dst_channels, fs_src, src, pad_inline);
+ if (dual_source_blend) {
+ generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels,
+ fs_src1, src1, pad_inline);
+ }
+ } else {
+ src_count = num_fullblock_fs * dst_channels;
+ /*
+ * We reorder things a bit here, so the cases for 4-wide and 8-wide
+ * (AVX) turn out the same later when untwiddling/transpose (albeit
+ * for true AVX2 path untwiddle needs to be different).
+ * For now just order by colors first (so we can use unpack later).
+ */
+ for (j = 0; j < num_fullblock_fs; j++) {
+ for (i = 0; i < dst_channels; i++) {
+ src[i*num_fullblock_fs + j] = fs_src[j][i];
+ if (dual_source_blend) {
+ src1[i*num_fullblock_fs + j] = fs_src1[j][i];
+ }
+ }
+ }
}
src_channels = dst_channels < 3 ? dst_channels : 4;
assert(bits == 128 || bits == 256);
}
+ if (twiddle_after_convert) {
+ fs_twiddle_transpose(gallivm, row_type, src, src_count, src);
+ if (dual_source_blend) {
+ fs_twiddle_transpose(gallivm, row_type, src1, src_count, src1);
+ }
+ }
/*
* Blend Colour conversion
*/
blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
- blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
- blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+ blend_color = LLVMBuildPointerCast(builder, blend_color,
+ LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+ blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color,
+ &i32_zero, 1, ""), "");
/* Convert */
lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
mask_type.length = pixels;
mask_type.width = row_type.width * dst_channels;
- src_mask[i] = LLVMBuildIntCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), "");
+ /*
+ * If mask_type width is smaller than 32bit, this doesn't quite
+ * generate the most efficient code (could use some pack).
+ */
+ src_mask[i] = LLVMBuildIntCast(builder, src_mask[i],
+ lp_build_int_vec_type(gallivm, mask_type), "");
mask_type.length *= dst_channels;
mask_type.width /= dst_channels;
}
- src_mask[i] = LLVMBuildBitCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), "");
+ src_mask[i] = LLVMBuildBitCast(builder, src_mask[i],
+ lp_build_int_vec_type(gallivm, mask_type), "");
src_mask[i] = lp_build_pad_vector(gallivm, src_mask[i], row_type.length);
}
if (dst_count > src_count) {
if ((dst_type.width == 8 || dst_type.width == 16) &&
- util_is_power_of_two(dst_type.length) &&
+ util_is_power_of_two_or_zero(dst_type.length) &&
dst_type.length * dst_type.width < 128) {
/*
* Never try to load values as 4xi8 which we will then
* It seems some cleanup could be done here (like skipping conversion/blend
* when not needed).
*/
- convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+ convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type,
+ row_type, dst, src_count);
/*
* FIXME: Really should get logic ops / masks out of generic blend / row
pad_inline ? 4 : dst_channels);
}
- convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+ convert_from_blend_type(gallivm, block_size, out_format_desc,
+ row_type, dst_type, dst, src_count);
/* Split the blend rows back to memory rows */
if (dst_count > src_count) {
unsigned partial_mask)
{
struct gallivm_state *gallivm = variant->gallivm;
- const struct lp_fragment_shader_variant_key *key = &variant->key;
+ struct lp_fragment_shader_variant_key *key = &variant->key;
struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
char func_name[64];
struct lp_type fs_type;
struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
LLVMTypeRef blend_vec_type;
- LLVMTypeRef arg_types[13];
+ LLVMTypeRef arg_types[15];
LLVMTypeRef func_type;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
LLVMValueRef dady_ptr;
LLVMValueRef color_ptr_ptr;
LLVMValueRef stride_ptr;
+ LLVMValueRef color_sample_stride_ptr;
LLVMValueRef depth_ptr;
LLVMValueRef depth_stride;
+ LLVMValueRef depth_sample_stride;
LLVMValueRef mask_input;
LLVMValueRef thread_data_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
+ struct lp_build_image_soa *image;
struct lp_build_interp_soa_context interp;
LLVMValueRef fs_mask[16 / 4];
LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
blend_vec_type = lp_build_vec_type(gallivm, blend_type);
- util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, partial_mask ? "partial" : "whole");
+ snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = variant->jit_context_ptr_type; /* context */
arg_types[1] = int32_type; /* x */
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */
- arg_types[9] = int32_type; /* mask_input */
+ arg_types[9] = LLVMInt64TypeInContext(gallivm->context); /* mask_input */
arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */
arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */
arg_types[12] = int32_type; /* depth_stride */
+ arg_types[13] = LLVMPointerType(int32_type, 0); /* color sample strides */
+ arg_types[14] = int32_type; /* depth sample stride */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
arg_types, ARRAY_SIZE(arg_types), 0);
thread_data_ptr = LLVMGetParam(function, 10);
stride_ptr = LLVMGetParam(function, 11);
depth_stride = LLVMGetParam(function, 12);
+ color_sample_stride_ptr = LLVMGetParam(function, 13);
+ depth_sample_stride = LLVMGetParam(function, 14);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(stride_ptr, "stride_ptr");
lp_build_name(depth_stride, "depth_stride");
+ lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr");
+ lp_build_name(depth_sample_stride, "depth_sample_stride");
/*
* Function body
assert(builder);
LLVMPositionBuilderAtEnd(builder, block);
+ /*
+ * Must not count ps invocations if there's a null shader.
+ * (It would be ok to count with null shader if there's d/s tests,
+ * but only if there's d/s buffers too, which is different
+ * to implicit rasterization disable which must not depend
+ * on the d/s buffers.)
+ * Could use popcount on mask, but pixel accuracy is not required.
+ * Could disable if there's no stats query, but maybe not worth it.
+ */
+ if (shader->info.base.num_instructions > 1) {
+ LLVMValueRef invocs, val;
+ invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr);
+ val = LLVMBuildLoad(builder, invocs, "");
+ val = LLVMBuildAdd(builder, val,
+ LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0),
+ "invoc_count");
+ LLVMBuildStore(builder, val, invocs);
+ }
+
/* code generated texture sampling */
- sampler = lp_llvm_sampler_soa_create(key->state);
+ sampler = lp_llvm_sampler_soa_create(key->samplers);
+ image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
/* for 1d resources only run "upper half" of stamp */
if (partial_mask) {
mask = generate_quad_mask(gallivm, fs_type,
- i*fs_type.length/4, mask_input);
+ i*fs_type.length/4, 0, mask_input);
}
else {
mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
num_loop,
&interp,
sampler,
+ image,
mask_store, /* output */
color_store,
depth_ptr,
}
sampler->destroy(sampler);
-
+ image->destroy(image);
/* Loop over color outputs / color buffers to do blending.
*/
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
static void
-dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
+dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
{
unsigned i;
if (key->flatshade) {
debug_printf("flatshade = 1\n");
}
+ if (key->multisample) {
+ debug_printf("multisample = 1\n");
+ debug_printf("coverage samples = %d\n", key->coverage_samples);
+ }
for (i = 0; i < key->nr_cbufs; ++i) {
debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+ debug_printf("cbuf nr_samples[%u] = %d\n", i, key->cbuf_nr_samples[i]);
}
if (key->depth.enabled || key->stencil[0].enabled) {
debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+ debug_printf("depth nr_samples = %d\n", key->zsbuf_nr_samples);
}
if (key->depth.enabled) {
- debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
}
for (i = 0; i < 2; ++i) {
if (key->stencil[i].enabled) {
- debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
- debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
- debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
- debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
+ debug_printf("stencil[%u].func = %s\n", i, util_str_func(key->stencil[i].func, TRUE));
+ debug_printf("stencil[%u].fail_op = %s\n", i, util_str_stencil_op(key->stencil[i].fail_op, TRUE));
+ debug_printf("stencil[%u].zpass_op = %s\n", i, util_str_stencil_op(key->stencil[i].zpass_op, TRUE));
+ debug_printf("stencil[%u].zfail_op = %s\n", i, util_str_stencil_op(key->stencil[i].zfail_op, TRUE));
debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
}
}
if (key->alpha.enabled) {
- debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
+ debug_printf("alpha.func = %s\n", util_str_func(key->alpha.func, TRUE));
}
if (key->occlusion_count) {
}
if (key->blend.logicop_enable) {
- debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
+ debug_printf("blend.logicop_func = %s\n", util_str_logicop(key->blend.logicop_func, TRUE));
}
else if (key->blend.rt[0].blend_enable) {
- debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE));
- debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
- debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
- debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE));
- debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
- debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
+ debug_printf("blend.rgb_func = %s\n", util_str_blend_func (key->blend.rt[0].rgb_func, TRUE));
+ debug_printf("blend.rgb_src_factor = %s\n", util_str_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
+ debug_printf("blend.rgb_dst_factor = %s\n", util_str_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
+ debug_printf("blend.alpha_func = %s\n", util_str_blend_func (key->blend.rt[0].alpha_func, TRUE));
+ debug_printf("blend.alpha_src_factor = %s\n", util_str_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
+ debug_printf("blend.alpha_dst_factor = %s\n", util_str_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
if (key->blend.alpha_to_coverage) {
debug_printf("blend.alpha_to_coverage is enabled\n");
}
for (i = 0; i < key->nr_samplers; ++i) {
- const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
+ const struct lp_static_sampler_state *sampler = &key->samplers[i].sampler_state;
debug_printf("sampler[%u] = \n", i);
debug_printf(" .wrap = %s %s %s\n",
- util_dump_tex_wrap(sampler->wrap_s, TRUE),
- util_dump_tex_wrap(sampler->wrap_t, TRUE),
- util_dump_tex_wrap(sampler->wrap_r, TRUE));
+ util_str_tex_wrap(sampler->wrap_s, TRUE),
+ util_str_tex_wrap(sampler->wrap_t, TRUE),
+ util_str_tex_wrap(sampler->wrap_r, TRUE));
debug_printf(" .min_img_filter = %s\n",
- util_dump_tex_filter(sampler->min_img_filter, TRUE));
+ util_str_tex_filter(sampler->min_img_filter, TRUE));
debug_printf(" .min_mip_filter = %s\n",
- util_dump_tex_mipfilter(sampler->min_mip_filter, TRUE));
+ util_str_tex_mipfilter(sampler->min_mip_filter, TRUE));
debug_printf(" .mag_img_filter = %s\n",
- util_dump_tex_filter(sampler->mag_img_filter, TRUE));
+ util_str_tex_filter(sampler->mag_img_filter, TRUE));
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
- debug_printf(" .compare_func = %s\n", util_dump_func(sampler->compare_func, TRUE));
+ debug_printf(" .compare_func = %s\n", util_str_func(sampler->compare_func, TRUE));
debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords);
debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
}
for (i = 0; i < key->nr_sampler_views; ++i) {
- const struct lp_static_texture_state *texture = &key->state[i].texture_state;
+ const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;
debug_printf("texture[%u] = \n", i);
debug_printf(" .format = %s\n",
util_format_name(texture->format));
debug_printf(" .target = %s\n",
- util_dump_tex_target(texture->target, TRUE));
+ util_str_tex_target(texture->target, TRUE));
debug_printf(" .level_zero_only = %u\n",
texture->level_zero_only);
debug_printf(" .pot = %u %u %u\n",
texture->pot_height,
texture->pot_depth);
}
+ struct lp_image_static_state *images = lp_fs_variant_key_images(key);
+ for (i = 0; i < key->nr_images; ++i) {
+ const struct lp_static_texture_state *image = &images[i].image_state;
+ debug_printf("image[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ util_format_name(image->format));
+ debug_printf(" .target = %s\n",
+ util_str_tex_target(image->target, TRUE));
+ debug_printf(" .level_zero_only = %u\n",
+ image->level_zero_only);
+ debug_printf(" .pot = %u %u %u\n",
+ image->pot_width,
+ image->pot_height,
+ image->pot_depth);
+ }
}
void
-lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
+lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
{
- debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
+ debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
variant->shader->no, variant->no);
- tgsi_dump(variant->shader->base.tokens, 0);
+ if (variant->shader->base.type == PIPE_SHADER_IR_TGSI)
+ tgsi_dump(variant->shader->base.tokens, 0);
+ else
+ nir_print_shader(variant->shader->base.ir.nir, stderr);
dump_fs_variant_key(&variant->key);
debug_printf("variant->opaque = %u\n", variant->opaque);
debug_printf("\n");
const struct lp_fragment_shader_variant_key *key)
{
struct lp_fragment_shader_variant *variant;
- const struct util_format_description *cbuf0_format_desc;
+ const struct util_format_description *cbuf0_format_desc = NULL;
boolean fullcolormask;
char module_name[64];
- variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+ variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
if (!variant)
return NULL;
- util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
- shader->no, shader->variants_created);
+ memset(variant, 0, sizeof(*variant));
+ snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
+ shader->no, shader->variants_created);
variant->gallivm = gallivm_create(module_name, lp->context);
if (!variant->gallivm) {
!key->alpha.enabled &&
!key->blend.alpha_to_coverage &&
!key->depth.enabled &&
- !shader->info.base.uses_kill
+ !shader->info.base.uses_kill &&
+ !shader->info.base.writes_samplemask
? TRUE : FALSE;
- if ((shader->info.base.num_tokens <= 1) &&
- !key->depth.enabled && !key->stencil[0].enabled) {
- variant->ps_inv_multiplier = 0;
- } else {
- variant->ps_inv_multiplier = 1;
- }
-
if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
lp_debug_fs_variant(variant);
}
struct lp_fragment_shader *shader;
int nr_samplers;
int nr_sampler_views;
+ int nr_images;
int i;
shader = CALLOC_STRUCT(lp_fragment_shader);
shader->no = fs_no++;
make_empty_list(&shader->variants);
- /* get/save the summary info for this shader */
- lp_build_tgsi_info(templ->tokens, &shader->info);
+ shader->base.type = templ->type;
+ if (templ->type == PIPE_SHADER_IR_TGSI) {
+ /* get/save the summary info for this shader */
+ lp_build_tgsi_info(templ->tokens, &shader->info);
- /* we need to keep a local copy of the tokens */
- shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+ /* we need to keep a local copy of the tokens */
+ shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+ } else {
+ shader->base.ir.nir = templ->ir.nir;
+ nir_tgsi_scan_shader(templ->ir.nir, &shader->info.base, true);
+ }
shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
if (shader->draw_data == NULL) {
nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
-
- shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
- state[MAX2(nr_samplers, nr_sampler_views)]);
+ nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+ shader->variant_key_size = lp_fs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
for (i = 0; i < shader->info.base.num_inputs; i++) {
shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
+ shader->inputs[i].location = shader->info.base.input_interpolate_loc[i];
switch (shader->info.base.input_interpolate[i]) {
case TGSI_INTERPOLATE_CONSTANT:
llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
- if (llvmpipe->fs == fs)
+ struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs;
+ if (llvmpipe->fs == lp_fs)
return;
- llvmpipe->fs = (struct lp_fragment_shader *) fs;
-
draw_bind_fragment_shader(llvmpipe->draw,
- (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
+ (lp_fs ? lp_fs->draw_data : NULL));
+
+ llvmpipe->fs = lp_fs;
llvmpipe->dirty |= LP_NEW_FS;
}
* Remove shader variant from two lists: the shader's variant list
* and the context's variant list.
*/
-void
+static void
llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant)
{
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
- debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
- " #%u v total cached #%u\n",
- variant->shader->no,
- variant->no,
+ if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
+ debug_printf("llvmpipe: del fs #%u var %u v created %u v cached %u "
+ "v total cached %u inst %u total inst %u\n",
+ variant->shader->no, variant->no,
variant->shader->variants_created,
variant->shader->variants_cached,
- lp->nr_fs_variants);
+ lp->nr_fs_variants, variant->nr_instrs, lp->nr_fs_instrs);
}
gallivm_destroy(variant->gallivm);
/* Delete draw module's data */
draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
+ if (shader->base.ir.nir)
+ ralloc_free(shader->base.ir.nir);
assert(shader->variants_cached == 0);
FREE((void *) shader->base.tokens);
FREE(shader);
static void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
- uint shader, uint index,
+ enum pipe_shader_type shader, uint index,
const struct pipe_constant_buffer *cb)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
}
if (shader == PIPE_SHADER_VERTEX ||
- shader == PIPE_SHADER_GEOMETRY) {
+ shader == PIPE_SHADER_GEOMETRY ||
+ shader == PIPE_SHADER_TESS_CTRL ||
+ shader == PIPE_SHADER_TESS_EVAL) {
/* Pass the constants to the 'draw' module */
const unsigned size = cb ? cb->buffer_size : 0;
const ubyte *data;
draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
index, data, size);
}
- else {
+ else if (shader == PIPE_SHADER_COMPUTE)
+ llvmpipe->cs_dirty |= LP_CSNEW_CONSTANTS;
+ else
llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
- }
if (cb && cb->user_buffer) {
pipe_resource_reference(&constants, NULL);
}
}
+static void
+llvmpipe_set_shader_buffers(struct pipe_context *pipe,
+ enum pipe_shader_type shader, unsigned start_slot,
+ unsigned count, const struct pipe_shader_buffer *buffers,
+ unsigned writable_bitmask)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i, idx;
+ for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+ const struct pipe_shader_buffer *buffer = buffers ? &buffers[idx] : NULL;
+
+ util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer);
+
+ if (shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY ||
+ shader == PIPE_SHADER_TESS_CTRL ||
+ shader == PIPE_SHADER_TESS_EVAL) {
+ const unsigned size = buffer ? buffer->buffer_size : 0;
+ const ubyte *data = NULL;
+ if (buffer && buffer->buffer)
+ data = (ubyte *) llvmpipe_resource_data(buffer->buffer);
+ if (data)
+ data += buffer->buffer_offset;
+ draw_set_mapped_shader_buffer(llvmpipe->draw, shader,
+ i, data, size);
+ } else if (shader == PIPE_SHADER_COMPUTE) {
+ llvmpipe->cs_dirty |= LP_CSNEW_SSBOS;
+ } else if (shader == PIPE_SHADER_FRAGMENT) {
+ llvmpipe->dirty |= LP_NEW_FS_SSBOS;
+ }
+ }
+}
+
+static void
+llvmpipe_set_shader_images(struct pipe_context *pipe,
+ enum pipe_shader_type shader, unsigned start_slot,
+ unsigned count, const struct pipe_image_view *images)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i, idx;
+
+ draw_flush(llvmpipe->draw);
+ for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+ const struct pipe_image_view *image = images ? &images[idx] : NULL;
+
+ util_copy_image_view(&llvmpipe->images[shader][i], image);
+ }
+
+ llvmpipe->num_images[shader] = start_slot + count;
+ if (shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY ||
+ shader == PIPE_SHADER_TESS_CTRL ||
+ shader == PIPE_SHADER_TESS_EVAL) {
+ draw_set_images(llvmpipe->draw,
+ shader,
+ llvmpipe->images[shader],
+ start_slot + count);
+ } else if (shader == PIPE_SHADER_COMPUTE)
+ llvmpipe->cs_dirty |= LP_CSNEW_IMAGES;
+ else
+ llvmpipe->dirty |= LP_NEW_FS_IMAGES;
+}
/**
* Return the blend factor equivalent to a destination alpha of one.
* TODO: there is actually no reason to tie this to context state -- the
* generated code could be cached globally in the screen.
*/
-static void
+static struct lp_fragment_shader_variant_key *
make_variant_key(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
- struct lp_fragment_shader_variant_key *key)
+ char *store)
{
unsigned i;
+ struct lp_fragment_shader_variant_key *key;
- memset(key, 0, shader->variant_key_size);
+ key = (struct lp_fragment_shader_variant_key *)store;
+
+ memset(key, 0, offsetof(struct lp_fragment_shader_variant_key, samplers[1]));
if (lp->framebuffer.zsbuf) {
enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format;
if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
key->resource_1d = TRUE;
}
+ key->zsbuf_nr_samples = util_res_sample_count(lp->framebuffer.zsbuf->texture);
}
/*
if (lp->rasterizer->clip_halfz) {
key->depth_clamp = 1;
} else {
- key->depth_clamp = (lp->rasterizer->depth_clip == 0) ? 1 : 0;
+ key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0;
}
/* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
- if (lp->active_occlusion_queries) {
+ key->multisample = lp->rasterizer->multisample;
+ if (lp->active_occlusion_queries && !lp->queries_disabled) {
key->occlusion_count = TRUE;
}
memcpy(&key->blend, lp->blend, sizeof key->blend);
}
+ key->coverage_samples = 1;
+ if (key->multisample)
+ key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer);
key->nr_cbufs = lp->framebuffer.nr_cbufs;
if (!key->blend.independent_blend_enable) {
const struct util_format_description *format_desc;
key->cbuf_format[i] = format;
+ key->cbuf_nr_samples[i] = util_res_sample_count(lp->framebuffer.cbufs[i]->texture);
/*
* Figure out if this is a 1d resource. Note that OpenGL allows crazy
else {
/* no color buffer for this fragment output */
key->cbuf_format[i] = PIPE_FORMAT_NONE;
+ key->cbuf_nr_samples[i] = 0;
blend_rt->colormask = 0x0;
blend_rt->blend_enable = 0;
}
*/
key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+ struct lp_sampler_static_state *fs_sampler;
+
+ fs_sampler = key->samplers;
+
+ memset(fs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *fs_sampler);
+
for(i = 0; i < key->nr_samplers; ++i) {
if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
- lp_sampler_static_sampler_state(&key->state[i].sampler_state,
+ lp_sampler_static_sampler_state(&fs_sampler[i].sampler_state,
lp->samplers[PIPE_SHADER_FRAGMENT][i]);
}
}
if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
for(i = 0; i < key->nr_sampler_views; ++i) {
- if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
- lp_sampler_static_texture_state(&key->state[i].texture_state,
+ /*
+ * Note sview may exceed what's representable by file_mask.
+ * This will still work, the only downside is that not actually
+ * used views may be included in the shader key.
+ */
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
+ lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
}
}
key->nr_sampler_views = key->nr_samplers;
for(i = 0; i < key->nr_sampler_views; ++i) {
if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
- lp_sampler_static_texture_state(&key->state[i].texture_state,
+ lp_sampler_static_texture_state(&fs_sampler[i].texture_state,
lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
}
}
}
+
+ struct lp_image_static_state *lp_image;
+ lp_image = lp_fs_variant_key_images(key);
+ key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
+ for (i = 0; i < key->nr_images; ++i) {
+ if (shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) {
+ lp_sampler_static_texture_state_image(&lp_image[i].image_state,
+ &lp->images[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+ return key;
}
llvmpipe_update_fs(struct llvmpipe_context *lp)
{
struct lp_fragment_shader *shader = lp->fs;
- struct lp_fragment_shader_variant_key key;
+ struct lp_fragment_shader_variant_key *key;
struct lp_fragment_shader_variant *variant = NULL;
struct lp_fs_variant_list_item *li;
+ char store[LP_FS_MAX_VARIANT_KEY_SIZE];
- make_variant_key(lp, shader, &key);
+ key = make_variant_key(lp, shader, store);
/* Search the variants for one which matches the key */
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
- if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
+ if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
unsigned i;
unsigned variants_to_cull;
- if (0) {
+ if (LP_DEBUG & DEBUG_FS) {
debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
lp->nr_fs_variants,
lp->nr_fs_instrs,
}
/* First, check if we've exceeded the max number of shader variants.
- * If so, free 25% of them (the least recently used ones).
+ * If so, free 6.25% of them (the least recently used ones).
*/
- variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
+ variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0;
if (variants_to_cull ||
lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
struct pipe_context *pipe = &lp->pipe;
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("Evicting FS: %u fs variants,\t%u total variants,"
+ "\t%u instrs,\t%u instrs/variant\n",
+ shader->variants_cached,
+ lp->nr_fs_variants, lp->nr_fs_instrs,
+ lp->nr_fs_instrs / lp->nr_fs_variants);
+ }
+
/*
* XXX: we need to flush the context until we have some sort of
* reference counting in fragment shaders as they may still be binned
* Generate the new variant.
*/
t0 = os_time_get();
- variant = generate_variant(lp, shader, &key);
+ variant = generate_variant(lp, shader, key);
t1 = os_time_get();
dt = t1 - t0;
LP_COUNT_ADD(llvm_compile_time, dt);
llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
+
+ llvmpipe->pipe.set_shader_buffers = llvmpipe_set_shader_buffers;
+ llvmpipe->pipe.set_shader_images = llvmpipe_set_shader_images;
}
-/*
- * Rasterization is disabled if there is no pixel shader and
- * both depth and stencil testing are disabled:
- * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
- */
-boolean
-llvmpipe_rasterization_disabled(struct llvmpipe_context *lp)
-{
- boolean null_fs = !lp->fs || lp->fs->info.base.num_tokens <= 1;
- return (null_fs &&
- !lp->depth_stencil->depth.enabled &&
- !lp->depth_stencil->stencil[0].enabled);
-}