+#include "util/u_math.h"
+
+/* For work registers, we can subdivide in various ways. So we create
+ * classes for the various sizes and conflict accordingly, keeping in
+ * mind that physical registers are divided along 128-bit boundaries.
+ * The important part is that 128-bit boundaries are not crossed.
+ *
+ * For each 128-bit register, we can subdivide to 32-bits 10 ways
+ *
+ * vec4: xyzw
+ * vec3: xyz, yzw
+ * vec2: xy, yz, zw,
+ * vec1: x, y, z, w
+ *
+ * For each 64-bit register, we can subdivide similarly to 16-bit
+ * (TODO: half-float RA, not that we support fp16 yet)
+ */
+
+#define WORK_STRIDE 10
+
+/* Prepacked masks/swizzles for virtual register types */
+static unsigned reg_type_to_mask[WORK_STRIDE] = {
+ 0xF, /* xyzw */
+ 0x7, 0x7 << 1, /* xyz */
+ 0x3, 0x3 << 1, 0x3 << 2, /* xy */
+ 0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3 /* x */
+};
+
+static unsigned reg_type_to_swizzle[WORK_STRIDE] = {
+ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+
+ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_Y, COMPONENT_Z, COMPONENT_W, COMPONENT_W),
+
+ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_Y, COMPONENT_Z, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_Z, COMPONENT_W, COMPONENT_Z, COMPONENT_W),
+
+ SWIZZLE(COMPONENT_X, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_Y, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_Z, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+ SWIZZLE(COMPONENT_W, COMPONENT_Y, COMPONENT_Z, COMPONENT_W),
+};
+
+struct phys_reg {
+ unsigned reg;
+ unsigned mask;
+ unsigned swizzle;
+};
+
+/* Given the mask/swizzle of both the register and the original source,
+ * compose to find the actual mask/swizzle to give the hardware */
+
+static unsigned
+compose_writemask(unsigned mask, struct phys_reg reg)
+{
+ /* Note: the reg mask is guaranteed to be contiguous. So we shift
+ * into the X place, compose via a simple AND, and shift back */
+
+ unsigned shift = __builtin_ctz(reg.mask);
+ return ((reg.mask >> shift) & mask) << shift;
+}
+
+static unsigned
+compose_swizzle(unsigned swizzle, unsigned mask,
+ struct phys_reg reg, struct phys_reg dst)
+{
+ unsigned out = pan_compose_swizzle(swizzle, reg.swizzle);
+
+ /* Based on the register mask, we need to adjust over. E.g if we're
+ * writing to yz, a base swizzle of xy__ becomes _xy_. Save the
+ * original first component (x). But to prevent duplicate shifting
+ * (only applies to ALU -- mask param is set to xyzw out on L/S to
+ * prevent changes), we have to account for the shift inherent to the
+ * original writemask */
+
+ unsigned rep = out & 0x3;
+ unsigned shift = __builtin_ctz(dst.mask) - __builtin_ctz(mask);
+ unsigned shifted = out << (2*shift);
+
+ /* ..but we fill in the gaps so it appears to replicate */
+
+ for (unsigned s = 0; s < shift; ++s)
+ shifted |= rep << (2*s);
+
+ return shifted;
+}