+void
+vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
+ unsigned first_output, struct ureg_dst tex)
+{
+ struct ureg_src vrect, vpos;
+ struct ureg_src scale;
+ struct ureg_dst t_start;
+ struct ureg_dst o_l_addr[2], o_r_addr[2];
+
+ vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+ vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+ t_start = ureg_DECL_temporary(shader);
+
+ --first_output;
+
+ o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
+ o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
+
+ o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
+ o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
+
+ scale = ureg_imm2f(shader,
+ (float)VL_BLOCK_WIDTH / idct->buffer_width,
+ (float)VL_BLOCK_HEIGHT / idct->buffer_height);
+
+ ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
+ ureg_scalar(vrect, TGSI_SWIZZLE_X),
+ ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
+ ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
+
+ calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
+ calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+
+ ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
+ ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
+}
+
+void
+vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
+ unsigned first_input, struct ureg_dst fragment)
+{
+ struct ureg_src l_addr[2], r_addr[2];
+
+ struct ureg_dst l[2], r[2];
+
+ --first_input;
+
+ l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+ l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+ r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+ r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+ l[0] = ureg_DECL_temporary(shader);
+ l[1] = ureg_DECL_temporary(shader);
+ r[0] = ureg_DECL_temporary(shader);
+ r[1] = ureg_DECL_temporary(shader);
+
+ fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
+ fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
+
+ matrix_mul(shader, fragment, l, r);
+
+ ureg_release_temporary(shader, l[0]);
+ ureg_release_temporary(shader, l[1]);
+ ureg_release_temporary(shader, r[0]);
+ ureg_release_temporary(shader, r[1]);
+}
+