#include "util/u_dump.h"
#include "util/u_string.h"
#include "util/u_simple_list.h"
+#include "util/u_dual_blend.h"
#include "os/os_time.h"
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
shader->info.base.num_inputs < 3 &&
shader->info.base.num_instructions < 8);
+ const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&key->blend, 0);
unsigned attrib;
unsigned chan;
unsigned cbuf;
color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
}
}
+ if (dual_source_blend) {
+ assert(key->nr_cbufs <= 1);
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ color[1][chan] = lp_build_alloca(gallivm, vec_type, "color1");
+ }
+ }
/* do triangle edge testing */
if (partial_mask) {
/* Color write */
for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
{
- if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
- shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+ ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
{
unsigned cbuf = shader->info.base.output_semantic_index[attrib];
for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
shader->info.base.num_inputs < 3 &&
shader->info.base.num_instructions < 8);
+ const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&key->blend, 0);
unsigned attrib;
unsigned chan;
unsigned cbuf;
num_loop, "color");
}
}
-
+ if (dual_source_blend) {
+ assert(key->nr_cbufs <= 1);
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ out_color[1][chan] = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm,
+ type),
+ num_loop, "color1");
+ }
+ }
/* 'mask' will control execution based on quad's pixel alive/killed state */
/* Color write */
for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
{
- if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
- shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+ ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
{
- unsigned cbuf = shader->info.base.output_semantic_index[attrib];
for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
if(outputs[attrib][chan]) {
/* XXX: just initialize outputs to point at colors[] and
}
+/**
+ * Convert alpha to same blend type as src
+ */
+static void
+convert_alpha(struct gallivm_state *gallivm,
+ struct lp_type row_type,
+ struct lp_type alpha_type,
+ const unsigned block_size,
+ const unsigned block_height,
+ const unsigned src_count,
+ const unsigned dst_channels,
+ const bool pad_inline,
+ LLVMValueRef* src_alpha)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned i, j;
+ unsigned length = row_type.length;
+ row_type.length = alpha_type.length;
+
+ /* Twiddle the alpha to match pixels */
+ lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, 4, src_alpha);
+
+ for (i = 0; i < 4; ++i) {
+ lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, &src_alpha[i], 1);
+ }
+
+ alpha_type = row_type;
+ row_type.length = length;
+
+ /* If only one channel we can only need the single alpha value per pixel */
+ if (src_count == 1) {
+ assert(dst_channels == 1);
+
+ lp_build_concat_n(gallivm, alpha_type, src_alpha, 4, src_alpha, src_count);
+ } else {
+ /* If there are more srcs than rows then we need to split alpha up */
+ if (src_count > block_height) {
+ for (i = src_count; i > 0; --i) {
+ unsigned pixels = block_size / src_count;
+ unsigned idx = i - 1;
+
+ src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4], (idx * pixels) % 4, pixels);
+ }
+ }
+
+ /* If there is a src for each pixel broadcast the alpha across whole row */
+ if (src_count == block_size) {
+ for (i = 0; i < src_count; ++i) {
+ src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+ }
+ } else {
+ unsigned pixels = block_size / src_count;
+ unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
+ unsigned alpha_span = 1;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+ /* Check if we need 2 src_alphas for our shuffles */
+ if (pixels > alpha_type.length) {
+ alpha_span = 2;
+ }
+
+ /* Broadcast alpha across all channels, e.g. a1a2 to a1a1a1a1a2a2a2a2 */
+ for (j = 0; j < row_type.length; ++j) {
+ if (j < pixels * channels) {
+ shuffles[j] = lp_build_const_int32(gallivm, j / channels);
+ } else {
+ shuffles[j] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+ }
+ }
+
+ for (i = 0; i < src_count; ++i) {
+ unsigned idx1 = i, idx2 = i;
+
+ if (alpha_span > 1){
+ idx1 *= alpha_span;
+ idx2 = idx1 + 1;
+ }
+
+ src_alpha[i] = LLVMBuildShuffleVector(builder,
+ src_alpha[idx1],
+ src_alpha[idx2],
+ LLVMConstVector(shuffles, row_type.length),
+ "");
+ }
+ }
+ }
+}
+
+
/**
* Generates the blend function for unswizzled colour buffers
* Also generates the read & write from colour buffer
unsigned int num_fs,
struct lp_type fs_type,
LLVMValueRef* fs_mask,
- LLVMValueRef fs_out_color[TGSI_NUM_CHANNELS][4],
+ LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][4],
LLVMValueRef context_ptr,
LLVMValueRef color_ptr,
LLVMValueRef stride,
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
+ LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS];
LLVMValueRef src_alpha[4 * 4];
+ LLVMValueRef src1_alpha[4 * 4];
LLVMValueRef src_mask[4 * 4];
LLVMValueRef src[4 * 4];
+ LLVMValueRef src1[4 * 4];
LLVMValueRef dst[4 * 4];
LLVMValueRef blend_color;
LLVMValueRef blend_alpha;
struct lp_build_mask_context mask_ctx;
struct lp_type mask_type;
struct lp_type blend_type;
- struct lp_type alpha_type;
struct lp_type row_type;
struct lp_type dst_type;
unsigned char swizzle[TGSI_NUM_CHANNELS];
unsigned vector_width;
+ unsigned src_channels = TGSI_NUM_CHANNELS;
unsigned dst_channels;
- unsigned src_channels;
unsigned dst_count;
unsigned src_count;
unsigned i, j;
bool pad_inline = is_arithmetic_format(out_format_desc);
bool has_alpha = false;
+ const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&variant->key.blend, 0);
- src_channels = TGSI_NUM_CHANNELS;
mask_type = lp_int32_vec4_type();
mask_type.length = fs_type.length;
*/
for (i = 0; i < num_fs; ++i) {
/* Always load alpha for use in blending */
- LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[alpha_channel][i], "");
+ LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
/* Load each channel */
for (j = 0; j < dst_channels; ++j) {
- fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[swizzle[j]][i], "");
+ fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
}
/* If 3 channels then pad to include alpha for 4 element transpose */
+ /*
+ * XXX If we include that here maybe could actually use it instead of
+ * separate alpha for blending?
+ */
if (dst_channels == 3 && !has_alpha) {
fs_src[i][3] = alpha;
- swizzle[3] = 3;
}
/* We split the row_mask and row_alpha as we want 128bit interleave */
src_alpha[i] = alpha;
}
}
+ if (dual_source_blend) {
+ /* same as above except different src/dst, skip masks and comments... */
+ for (i = 0; i < num_fs; ++i) {
+ LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+
+ for (j = 0; j < dst_channels; ++j) {
+ fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+ }
+ if (dst_channels == 3 && !has_alpha) {
+ fs_src1[i][3] = alpha;
+ }
+ if (fs_type.length == 8) {
+ src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
+ src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+ } else {
+ src1_alpha[i] = alpha;
+ }
+ }
+ }
if (util_format_is_pure_integer(out_format)) {
/*
}
}
-
/*
* Pixel twiddle from fragment shader order to memory order
*/
src_count = generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, fs_src, src, pad_inline);
+ if (dual_source_blend) {
+ generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, fs_src1, src1, pad_inline);
+ }
+
src_channels = dst_channels < 3 ? dst_channels : 4;
if (src_count != num_fs * src_channels) {
unsigned ds = src_count / (num_fs * src_channels);
}
blend_type = row_type;
- alpha_type = fs_type;
- alpha_type.length = 4;
mask_type.length = 4;
/* Convert src to row_type */
- src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src);
+ if (dual_source_blend) {
+ struct lp_type old_row_type = row_type;
+ lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src);
+ src_count = lp_build_conv_auto(gallivm, fs_type, &old_row_type, src1, src_count, src1);
+ }
+ else {
+ src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src);
+ }
/* If the rows are not an SSE vector, combine them to become SSE size! */
if ((row_type.width * row_type.length) % 128) {
dst_count = src_count / (vector_width / bits);
combined = lp_build_concat_n(gallivm, row_type, src, src_count, src, dst_count);
+ if (dual_source_blend) {
+ lp_build_concat_n(gallivm, row_type, src1, src_count, src1, dst_count);
+ }
row_type.length *= combined;
src_count /= combined;
* Alpha conversion
*/
if (!has_alpha) {
- unsigned length = row_type.length;
- row_type.length = alpha_type.length;
-
- /* Twiddle the alpha to match pixels */
- lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, 4, src_alpha);
-
- for (i = 0; i < 4; ++i) {
- lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, &src_alpha[i], 1);
- }
-
- alpha_type = row_type;
- row_type.length = length;
-
- /* If only one channel we can only need the single alpha value per pixel */
- if (src_count == 1) {
- assert(dst_channels == 1);
-
- lp_build_concat_n(gallivm, alpha_type, src_alpha, 4, src_alpha, src_count);
- } else {
- /* If there are more srcs than rows then we need to split alpha up */
- if (src_count > block_height) {
- for (i = src_count; i > 0; --i) {
- unsigned pixels = block_size / src_count;
- unsigned idx = i - 1;
-
- src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4], (idx * pixels) % 4, pixels);
- }
- }
-
- /* If there is a src for each pixel broadcast the alpha across whole row */
- if (src_count == block_size) {
- for (i = 0; i < src_count; ++i) {
- src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
- }
- } else {
- unsigned pixels = block_size / src_count;
- unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
- unsigned alpha_span = 1;
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-
- /* Check if we need 2 src_alphas for our shuffles */
- if (pixels > alpha_type.length) {
- alpha_span = 2;
- }
-
- /* Broadcast alpha across all channels, e.g. a1a2 to a1a1a1a1a2a2a2a2 */
- for (j = 0; j < row_type.length; ++j) {
- if (j < pixels * channels) {
- shuffles[j] = lp_build_const_int32(gallivm, j / channels);
- } else {
- shuffles[j] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
- }
- }
-
- for (i = 0; i < src_count; ++i) {
- unsigned idx1 = i, idx2 = i;
-
- if (alpha_span > 1){
- idx1 *= alpha_span;
- idx2 = idx1 + 1;
- }
-
- src_alpha[i] = LLVMBuildShuffleVector(builder,
- src_alpha[idx1],
- src_alpha[idx2],
- LLVMConstVector(shuffles, row_type.length),
- "");
- }
- }
+ struct lp_type alpha_type = fs_type;
+ alpha_type.length = 4;
+ convert_alpha(gallivm, row_type, alpha_type,
+ block_size, block_height,
+ src_count, dst_channels,
+ pad_inline, src_alpha);
+ if (dual_source_blend) {
+ convert_alpha(gallivm, row_type, alpha_type,
+ block_size, block_height,
+ src_count, dst_channels,
+ pad_inline, src1_alpha);
}
}
rt,
src[i],
has_alpha ? NULL : src_alpha[i],
- NULL,
+ src1[i],
+ has_alpha ? NULL : src1_alpha[i],
dst[i],
partial_mask ? src_mask[i] : NULL,
blend_color,
unsigned cbuf;
boolean cbuf0_write_all;
boolean try_loop = TRUE;
+ const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&key->blend, 0);
assert(lp_native_vector_width / 32 >= 4);
mask_input,
thread_data_ptr);
- for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+ for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
fs_out_color[cbuf][chan][i] =
out_color[cbuf * !cbuf0_write_all][chan];
+ }
+ if (dual_source_blend) {
+ /* only support one dual source blend target hence always use output 1 */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
+ fs_out_color[1][chan][i] =
+ out_color[1][chan];
+ }
}
}
else {
fs_out_color[cbuf][chan][i] = ptr;
}
}
+ if (dual_source_blend) {
+ /* only support one dual source blend target hence always use output 1 */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ ptr = LLVMBuildGEP(builder,
+ color_store[1][chan],
+ &indexi, 1, "");
+ fs_out_color[1][chan][i] = ptr;
+ }
+ }
}
}
"");
generate_unswizzled_blend(gallivm, cbuf, variant, key->cbuf_format[cbuf],
- num_fs, fs_type, fs_mask, fs_out_color[cbuf],
+ num_fs, fs_type, fs_mask, fs_out_color,
context_ptr, color_ptr, stride, partial_mask, do_branch);
}