#include "lp_tex_sample.h"
#include "lp_flush.h"
#include "lp_state_fs.h"
+#include "lp_rast.h"
/** Fragment shader number (for debugging) */
bool twiddle;
bool split;
- unsigned pixels = num_fs == 4 ? 1 : 2;
+ unsigned pixels = type.length / 4;
unsigned reorder_group;
unsigned src_channels;
unsigned src_count;
src_channels = dst_channels < 3 ? dst_channels : 4;
src_count = num_fs * src_channels;
- assert(pixels == 2 || num_fs == 4);
+ assert(pixels == 2 || pixels == 1);
assert(num_fs * src_channels <= Elements(src));
/*
*/
static void
convert_to_blend_type(struct gallivm_state *gallivm,
+ unsigned block_size,
const struct util_format_description *src_fmt,
struct lp_type src_type,
struct lp_type dst_type,
struct lp_type blend_type;
struct lp_type mem_type;
unsigned i, j, k;
- unsigned pixels = 16 / num_srcs;
+ unsigned pixels = block_size / num_srcs;
bool is_arith;
/*
assert(dst_type.floating);
assert(dst_type.width == 32);
assert(dst_type.length % 4 == 0);
+ assert(num_srcs % 4 == 0);
+
for (i = 0; i < 4; i++) {
tmpsrc[i] = src[i];
}
for (i = 0; i < num_srcs / 4; i++) {
LLVMValueRef tmpsoa[4];
LLVMValueRef tmps = tmpsrc[i];
- if (num_srcs == 8) {
+ if (dst_type.length == 8) {
LLVMValueRef shuffles[8];
unsigned j;
/* fetch was 4 values but need 8-wide output values */
*/
static void
convert_from_blend_type(struct gallivm_state *gallivm,
+ unsigned block_size,
const struct util_format_description *src_fmt,
struct lp_type src_type,
struct lp_type dst_type,
struct lp_type mem_type;
struct lp_type blend_type;
LLVMBuilderRef builder = gallivm->builder;
- unsigned pixels = 16 / num_srcs;
+ unsigned pixels = block_size / num_srcs;
bool is_arith;
/*
assert(src_type.width == 32);
assert(src_type.length % 4 == 0);
assert(dst_type.width == 32);
+
for (i = 0; i < num_srcs / 4; i++) {
LLVMValueRef tmpsoa[4], tmpdst;
lp_build_transpose_aos(gallivm, src_type, &src[i * 4], tmpsoa);
tmpdst = lp_build_float_to_r11g11b10(gallivm, tmpsoa);
- if (num_srcs == 8) {
+ if (src_type.length == 8) {
LLVMValueRef tmpaos, shuffles[8];
unsigned j;
/*
row_type.length = alpha_type.length;
/* Twiddle the alpha to match pixels */
- lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, 4, src_alpha);
+ lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, block_height, src_alpha);
- for (i = 0; i < 4; ++i) {
+ /*
+ * TODO this should use single lp_build_conv call for
+ * src_count == 1 && dst_channels == 1 case (dropping the concat below)
+ */
+ for (i = 0; i < block_height; ++i) {
lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, &src_alpha[i], 1);
}
row_type.length = length;
/* If only one channel we can only need the single alpha value per pixel */
- if (src_count == 1) {
- assert(dst_channels == 1);
+ if (src_count == 1 && dst_channels == 1) {
- lp_build_concat_n(gallivm, alpha_type, src_alpha, 4, src_alpha, src_count);
+ lp_build_concat_n(gallivm, alpha_type, src_alpha, block_height, src_alpha, src_count);
} else {
/* If there are more srcs than rows then we need to split alpha up */
if (src_count > block_height) {
unsigned pixels = block_size / src_count;
unsigned idx = i - 1;
- src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4], (idx * pixels) % 4, pixels);
+ src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4],
+ (idx * pixels) % 4, pixels);
}
}
boolean do_branch)
{
const unsigned alpha_channel = 3;
- const unsigned block_width = 4;
- const unsigned block_height = 4;
+ const unsigned block_width = LP_RASTER_BLOCK_SIZE;
+ const unsigned block_height = LP_RASTER_BLOCK_SIZE;
const unsigned block_size = block_width * block_height;
const unsigned lp_integer_vector_width = 128;
/*
* Pixel twiddle from fragment shader order to memory order
*/
- src_count = generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, fs_src, src, pad_inline);
+ src_count = generate_fs_twiddle(gallivm, fs_type, num_fs,
+ dst_channels, fs_src, src, pad_inline);
if (dual_source_blend) {
- generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels, fs_src1, src1, pad_inline);
+ generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels,
+ fs_src1, src1, pad_inline);
}
src_channels = dst_channels < 3 ? dst_channels : 4;
unsigned bits = row_type.width * row_type.length;
unsigned combined;
+ assert(src_count >= (vector_width / bits));
+
dst_count = src_count / (vector_width / bits);
+
combined = lp_build_concat_n(gallivm, row_type, src, src_count, src, dst_count);
if (dual_source_blend) {
lp_build_concat_n(gallivm, row_type, src1, src_count, src1, dst_count);
/*
* Mask conversion
*/
- lp_bld_quad_twiddle(gallivm, mask_type, &src_mask[0], 4, &src_mask[0]);
+ lp_bld_quad_twiddle(gallivm, mask_type, &src_mask[0], block_height, &src_mask[0]);
if (src_count < block_height) {
lp_build_concat_n(gallivm, mask_type, src_mask, 4, src_mask, src_count);
unsigned pixels = block_size / src_count;
unsigned idx = i - 1;
- src_mask[idx] = lp_build_extract_range(gallivm, src_mask[(idx * pixels) / 4], (idx * pixels) % 4, pixels);
+ src_mask[idx] = lp_build_extract_range(gallivm, src_mask[(idx * pixels) / 4],
+ (idx * pixels) % 4, pixels);
}
}
dst_count = src_count;
}
- dst_type.length *= 16 / dst_count;
+ dst_type.length *= block_size / dst_count;
if (out_format == PIPE_FORMAT_R11G11B10_FLOAT) {
/*
* It seems some cleanup could be done here (like skipping conversion/blend
* when not needed).
*/
- convert_to_blend_type(gallivm, out_format_desc, dst_type, row_type, dst, src_count);
+ convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
for (i = 0; i < src_count; ++i) {
dst[i] = lp_build_blend_aos(gallivm,
pad_inline ? 4 : dst_channels);
}
- convert_from_blend_type(gallivm, out_format_desc, row_type, dst_type, dst, src_count);
+ convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
/* Split the blend rows back to memory rows */
if (dst_count > src_count) {
src_count *= 2;
}
-
/*
* Store blend result to memory
*/