#include "lp_bld_type.h"
#include "lp_bld_arit.h"
+#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_quad.h"
+#include "lp_bld_pack.h"
static const unsigned char
return lp_build_sub(bld, a_bottom, a_top);
}
-
+/*
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
+ * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
+ * This only requires one shuffle instead of two for more straightforward packing.
+ */
LLVMValueRef
-lp_build_scalar_ddx(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
+ LLVMValueRef a)
{
- LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
- LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
- LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return lp_build_sub(bld, a_right, a_left);
+ struct gallivm_state *gallivm = bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef vec1, vec2;
+
+ /* use aos swizzle helper */
+
+ static const unsigned char swizzle1[] = { /* no-op swizzle */
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
+ };
+ static const unsigned char swizzle2[] = {
+ LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
+ };
+
+ vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
+ vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
+
+ if (bld->type.floating)
+ return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
+ else
+ return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
}
+/*
+ * Helper for building packed ddx/ddy vector for one coord (scalar per quad
+ * values). The vector will look like this (8-wide):
+ * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ * This only needs 2 (v)shufps.
+ */
LLVMValueRef
-lp_build_scalar_ddy(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
+ LLVMValueRef a, LLVMValueRef b)
{
- LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
- LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
- LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return lp_build_sub(bld, a_bottom, a_top);
+ struct gallivm_state *gallivm = bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef vec1, vec2;
+ unsigned length, num_quads, i;
+
+ /* XXX: do hsub version */
+ length = bld->type.length;
+ num_quads = length / 4;
+ for (i = 0; i < num_quads; i++) {
+ unsigned s1 = 4 * i;
+ unsigned s2 = 4 * i + length;
+ shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
+ shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
+ shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
+ shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
+ shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
+ shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
+ shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
+ shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
+ }
+ vec1 = LLVMBuildShuffleVector(builder, a, b,
+ LLVMConstVector(shuffles1, length), "");
+ vec2 = LLVMBuildShuffleVector(builder, a, b,
+ LLVMConstVector(shuffles2, length), "");
+ if (bld->type.floating)
+ return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
+ else
+ return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
+}
+
+
+/**
+ * Twiddle from quad format to row format
+ *
+ * src0 src1
+ * ######### ######### #################
+ * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0
+ * #---+---# #---+---# -> #################
+ * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1
+ * ######### ######### #################
+ *
+ */
+void
+lp_bld_quad_twiddle(struct gallivm_state *gallivm,
+ struct lp_type lp_dst_type,
+ const LLVMValueRef* src,
+ unsigned src_count,
+ LLVMValueRef* dst)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef dst_type_ref;
+ LLVMTypeRef type2_ref;
+ struct lp_type type2;
+ unsigned i;
+
+ assert((src_count % 2) == 0);
+
+ /* Create a type with only 2 elements */
+ type2 = lp_dst_type;
+ type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
+ type2.length = 2;
+ type2.floating = 0;
+
+ type2_ref = lp_build_vec_type(gallivm, type2);
+ dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
+
+ for (i = 0; i < src_count; i += 2) {
+ LLVMValueRef src0, src1;
+
+ src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
+ src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
+
+ dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
+ dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
+
+ dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
+ dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
+ }
}