+}
+
+static void
+increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
+ struct ureg_src saddr[2], bool right_side, bool transposed,
+ int pos, float size)
+{
+ unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+ unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+
+ /*
+ * daddr[0..1].(start) = saddr[0..1].(start)
+ * daddr[0..1].(tc) = saddr[0..1].(tc)
+ */
+
+ ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
+ ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
+ ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
+ ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
+}
+
+static void
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
+ struct ureg_src sampler, bool resource3d)
+{
+ ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
+ ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
+}
+
+static void
+matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
+{
+ struct ureg_dst tmp;
+
+ tmp = ureg_DECL_temporary(shader);
+
+ /*
+ * tmp.xy = dot4(m[0][0..1], m[1][0..1])
+ * dst = tmp.x + tmp.y
+ */
+ ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
+ ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
+ ureg_ADD(shader, dst,
+ ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+
+ ureg_release_temporary(shader, tmp);
+}
+
+static void *
+create_mismatch_vert_shader(struct vl_idct *idct)
+{
+ struct ureg_program *shader;
+ struct ureg_src vpos;
+ struct ureg_src scale;
+ struct ureg_dst t_tex;
+ struct ureg_dst o_vpos, o_addr[2];
+
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return NULL;
+
+ vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+ t_tex = ureg_DECL_temporary(shader);
+
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+ o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+ o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+ /*
+ * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
+ *
+ * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
+ * o_vpos.xy = t_vpos * scale
+ *
+ * o_addr = calc_addr(...)
+ *
+ */
+
+ scale = ureg_imm2f(shader,
+ (float)VL_BLOCK_WIDTH / idct->buffer_width,
+ (float)VL_BLOCK_HEIGHT / idct->buffer_height);
+
+ ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
+ ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+ ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
+ calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
+
+ ureg_release_temporary(shader, t_tex);
+
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, idct->pipe);