+ if (chan_desc.pure_integer) {
+ chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+ LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, type, chan_mask);
+ LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chan, mask_val, "");
+ chan = LLVMBuildSelect(builder, mask, mask_val, chan, "");
+ }
+ else if (type.floating) {
+ if (chan_desc.normalized) {
+ rgba = lp_build_clamp(bld, rgba, bld->zero, bld->one);
+ chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba);
+ } else
+ chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, "");
+ }
+ if (start)
+ chan = LLVMBuildShl(builder, chan,
+ lp_build_const_int_vec(gallivm, type, start), "");
+ if (!*output)
+ *output = chan;
+ else
+ *output = LLVMBuildOr(builder, *output, chan, "");
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (chan_desc.pure_integer) {
+ chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+ chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, chan_mask), "");
+ } else if (type.floating) {
+ if (chan_desc.normalized) {
+ char intrin[32];
+ double scale = ((1 << (chan_desc.size - 1)) - 1);
+ LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+ rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one);
+ rgba = LLVMBuildFMul(builder, rgba, scale_val, "");
+ lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type);
+ rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba);
+ }
+ chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, "");
+ chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, chan_mask), "");
+ }
+ if (start)
+ chan = LLVMBuildShl(builder, chan,
+ lp_build_const_int_vec(gallivm, type, start), "");
+ if (!*output)
+ *output = chan;
+ else
+ *output = LLVMBuildOr(builder, *output, chan, "");
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (type.floating) {
+ if (chan_desc.size == 16) {
+ chan = lp_build_float_to_half(gallivm, rgba);
+ chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, "");
+ if (start)
+ chan = LLVMBuildShl(builder, chan,
+ lp_build_const_int_vec(gallivm, type, start), "");
+ if (!*output)
+ *output = chan;
+ else
+ *output = LLVMBuildOr(builder, *output, chan, "");
+ } else {
+ assert(start == 0);
+ assert(stop == 32);
+ assert(type.width == 32);
+ *output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
+ }
+ } else
+ assert(0);
+ break;
+ default:
+ assert(0);
+ *output = bld->undef;
+ }
+}
+
+static void
+lp_build_pack_rgba_soa(struct gallivm_state *gallivm,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef rgba_in[4],
+ LLVMValueRef *packed)
+{
+ unsigned chan;
+ struct lp_build_context bld;
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= type.width);
+ /* FIXME: Support more output types */
+ assert(type.width == 32);
+
+ lp_build_context_init(&bld, gallivm, type);
+ for (chan = 0; chan < format_desc->nr_channels; ++chan) {
+ struct util_format_channel_description chan_desc = format_desc->channel[chan];
+
+ lp_build_insert_soa_chan(&bld, format_desc->block.bits,
+ chan_desc,
+ packed,
+ rgba_in[chan]);
+ }
+}
+
+void
+lp_build_store_rgba_soa(struct gallivm_state *gallivm,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef exec_mask,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef out_of_bounds,
+ const LLVMValueRef rgba_in[4])
+{
+ enum pipe_format format = format_desc->format;
+ LLVMValueRef packed[4];
+ unsigned num_stores = 0;
+
+ memset(packed, 0, sizeof(LLVMValueRef) * 4);
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->block.width == 1 &&
+ format_desc->block.height == 1 &&
+ format_desc->block.bits <= type.width &&
+ (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
+ format_desc->channel[0].size == 32 ||
+ format_desc->channel[0].size == 16))
+ {
+ lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]);
+
+ num_stores = 1;
+ } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+ (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) &&
+ format_desc->block.width == 1 &&
+ format_desc->block.height == 1 &&
+ format_desc->block.bits > type.width &&
+ ((format_desc->block.bits <= type.width * type.length &&
+ format_desc->channel[0].size <= type.width) ||
+ (format_desc->channel[0].size == 64 &&
+ format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
+ type.floating)))
+ {
+ /*
+ * Similar to above, but the packed pixel is larger than what fits
+ * into an element of the destination format. The packed pixels will be
+ * shuffled into SoA vectors appropriately, and then the extraction will
+ * be done in parallel as much as possible.
+ * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so
+ * the gathered vectors can be shuffled easily (even with avx).
+ * 64xn float -> 32xn float is handled too but it's a bit special as
+ * it does the conversion pre-shuffle.
+ */
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, gallivm, type);
+ assert(type.width == 32);
+ assert(format_desc->block.bits > type.width);
+
+ unsigned store_width = util_next_power_of_two(format_desc->block.bits);
+ num_stores = store_width / type.width;
+ for (unsigned i = 0; i < format_desc->nr_channels; i++) {
+ struct util_format_channel_description chan_desc = format_desc->channel[i];
+ unsigned blockbits = type.width;
+ unsigned vec_nr;
+
+ vec_nr = chan_desc.shift / type.width;
+ chan_desc.shift %= type.width;
+
+ lp_build_insert_soa_chan(&bld, blockbits,
+ chan_desc,
+ &packed[vec_nr],
+ rgba_in[i]);