LLVMValueRef
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
struct lp_type src_type,
+ unsigned chan_bits,
LLVMValueRef src);
if (type.floating) {
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- assert(width == 8);
if (format_desc->swizzle[3] == chan) {
input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
}
else {
struct lp_type conv_type = lp_uint_type(type);
- input = lp_build_srgb_to_linear(gallivm, conv_type, input);
+ input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
}
}
else {
* (3rd order polynomial is required for crappy but just sufficient accuracy)
*
* @param src integer (vector) value(s) to convert
- * (8 bit values unpacked to 32 bit already).
+ * (chan_bits bit values unpacked to 32 bit already).
*/
LLVMValueRef
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
struct lp_type src_type,
+ unsigned chan_bits,
LLVMValueRef src)
{
struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
};
assert(src_type.width == 32);
+ /* Technically this would work with more bits too but would be inaccurate. */
+ assert(chan_bits <= 8);
lp_build_context_init(&f32_bld, gallivm, f32_type);
*/
/* doing the 1/255 mul as part of the approximation */
srcf = lp_build_int_to_float(&f32_bld, src);
+ if (chan_bits != 8) {
+ /* could adjust all the constants instead */
+ LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
+ 255.0f / ((1 << chan_bits) - 1));
+ srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
+ }
lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
static LLVMValueRef
lp_build_linear_to_srgb(struct gallivm_state *gallivm,
struct lp_type src_type,
+ unsigned chan_bits,
LLVMValueRef src)
{
LLVMBuilderRef builder = gallivm->builder;
is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
+ if (chan_bits != 8) {
+ /* could adjust all the constants instead */
+ LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
+ ((1 << chan_bits) - 1) / 255.0f);
+ tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
+ }
+
f32_bld.type.sign = 0;
return lp_build_iround(&f32_bld, tmp);
}
/**
* Convert linear float soa values to packed srgb AoS values.
* This only handles packed formats which are 4x8bit in size
- * (rgba and rgbx plus swizzles).
+ * (rgba and rgbx plus swizzles), and 16bit 565-style formats
+ * with no alpha. (In the latter case the return values won't be
+ * fully packed, it will look like r5g6b5x16r5g6b5x16...)
*
* @param src float SoA (vector) values to convert.
*/
/* rgb is subject to linear->srgb conversion, alpha is not */
for (chan = 0; chan < 3; chan++) {
- tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
+ unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
+ tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
}
/*
* can't use lp_build_conv since we want to keep values as 32bit
if (bind & PIPE_BIND_RENDER_TARGET) {
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ /* this is a lie actually other formats COULD exist where we would fail */
if (format_desc->nr_channels < 3)
return FALSE;
}
unsigned chan;
if (format_expands_to_float_soa(format_desc)) {
- /* just make this a 32bit uint */
+ /* just make this a uint with width of block */
type->floating = false;
type->fixed = false;
type->sign = false;
type->norm = false;
- type->width = 32;
+ type->width = format_desc->block.bits;
type->length = 1;
return;
}
* This is pretty suboptimal for this case blending in SoA would be much
* better, since conversion gets us SoA values so need to convert back.
*/
- assert(src_type.width == 32);
+ assert(src_type.width == 32 || src_type.width == 16);
assert(dst_type.floating);
assert(dst_type.width == 32);
assert(dst_type.length % 4 == 0);
assert(num_srcs % 4 == 0);
+ if (src_type.width == 16) {
+ /* expand 4x16bit values to 4x32bit */
+ struct lp_type type32x4 = src_type;
+ LLVMTypeRef ltype32x4;
+ unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type32x4.width = 32;
+ ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+ }
+ src_type.width = 32;
+ }
for (i = 0; i < 4; i++) {
tmpsrc[i] = src[i];
}
assert(src_type.floating);
assert(src_type.width == 32);
assert(src_type.length % 4 == 0);
- assert(dst_type.width == 32);
+ assert(dst_type.width == 32 || dst_type.width == 16);
for (i = 0; i < num_srcs / 4; i++) {
LLVMValueRef tmpsoa[4], tmpdst;
src[i] = tmpdst;
}
}
+ if (dst_type.width == 16) {
+ struct lp_type type16x8 = dst_type;
+ struct lp_type type32x4 = dst_type;
+ LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+ unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type16x8.length = 8;
+ type32x4.width = 32;
+ ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+ ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+ ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+ /* We could do vector truncation but it doesn't generate very good code */
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+ src[i], lp_build_zero(gallivm, type32x4));
+ src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+ src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+ src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+ }
+ }
return;
}