X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fvl%2Fvl_idct.c;h=79adb045dad4ed6cf21dee3447c28c72e8a047b3;hb=a3ed98f7aa85636579a5696bf036ec13e5c9104a;hp=d91963ada02ba4413b94761775daf17599d99e98;hpb=7408a6ab89e0bc87209b50334604fae93277fdc6;p=mesa.git diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index d91963ada02..79adb045dad 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -25,207 +25,292 @@ * **************************************************************************/ -#include "vl_idct.h" -#include "util/u_draw.h" #include -#include -#include -#include -#include -#include -#include -#include "vl_types.h" -#define BLOCK_WIDTH 8 -#define BLOCK_HEIGHT 8 +#include "pipe/p_context.h" +#include "pipe/p_screen.h" -#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) +#include "util/u_draw.h" +#include "util/u_sampler.h" +#include "util/u_memory.h" -#define STAGE1_SCALE 4.0f -#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE) +#include "tgsi/tgsi_ureg.h" -struct vertex_shader_consts +#include "vl_defines.h" +#include "vl_types.h" +#include "vl_vertex_buffers.h" +#include "vl_idct.h" + +enum VS_OUTPUT { - struct vertex4f norm; + VS_O_VPOS = 0, + VS_O_L_ADDR0 = 0, + VS_O_L_ADDR1, + VS_O_R_ADDR0, + VS_O_R_ADDR1 }; -enum VS_INPUT +/** + * The DCT matrix stored as hex representation of floats. Equal to the following equation: + * for (i = 0; i < 8; ++i) + * for (j = 0; j < 8; ++j) + * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f); + * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f)); + */ +static const uint32_t const_matrix[8][8] = { + { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 }, + { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf }, + { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f }, + { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 }, + { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 }, + { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 }, + { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 }, + { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 }, +}; + +static void +calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], + struct ureg_src tc, struct ureg_src start, bool right_side, + bool transposed, float size) { - VS_I_RECT, - VS_I_VPOS, + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; - NUM_VS_INPUTS -}; + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; + unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; -enum VS_OUTPUT + /* + * addr[0..1].(start) = right_side ? start.x : tc.x + * addr[0..1].(tc) = right_side ? tc.y : start.y + * addr[0..1].z = tc.z + * addr[1].(start) += 1.0f / scale + */ + ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); + ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); + + ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); + ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); +} + +static void +increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2], + struct ureg_src saddr[2], bool right_side, bool transposed, + int pos, float size) { - VS_O_VPOS, - VS_O_BLOCK, - VS_O_TEX, - VS_O_START -}; + unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; + unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; -static const float const_matrix[8][8] = { - { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f }, - { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f }, - { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f }, - { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f }, - { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f }, - { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f }, - { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f }, - { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f } -}; + /* + * daddr[0..1].(start) = saddr[0..1].(start) + * daddr[0..1].(tc) = saddr[0..1].(tc) + */ -/* vertices for a quad covering a block */ -static const struct vertex2f const_quad[4] = { - {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} -}; + ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); + ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); + ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); + ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); +} + +static void +fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], + struct ureg_src sampler, bool resource3d) +{ + ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler); + ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler); +} + +static void +matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) +{ + struct ureg_dst tmp; + + tmp = ureg_DECL_temporary(shader); + + /* + * tmp.xy = dot4(m[0][0..1], m[1][0..1]) + * dst = tmp.x + tmp.y + */ + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); + ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); + ureg_ADD(shader, dst, + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); + + ureg_release_temporary(shader, tmp); +} static void * -create_vert_shader(struct vl_idct *idct) +create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; + struct ureg_src vpos; struct ureg_src scale; - struct ureg_src vrect, vpos; - struct ureg_dst t_vpos; - struct ureg_dst o_vpos, o_block, o_tex, o_start; + struct ureg_dst t_tex; + struct ureg_dst o_vpos, o_addr[2]; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; - scale = ureg_imm2f(shader, - (float)BLOCK_WIDTH / idct->destination->width0, - (float)BLOCK_HEIGHT / idct->destination->height0); - - t_vpos = ureg_DECL_temporary(shader); - - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + t_tex = ureg_DECL_temporary(shader); + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK); - o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX); - o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START); + + o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); + o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); /* - * t_vpos = vpos + vrect + * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = vpos + 7 / VL_BLOCK_WIDTH * o_vpos.xy = t_vpos * scale - * o_vpos.zw = vpos * - * o_block = vrect - * o_tex = t_pos - * o_start = vpos * scale + * o_addr = calc_addr(...) * */ - ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); - ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale); - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); - ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect); - ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale); + scale = ureg_imm2f(shader, + (float)VL_BLOCK_WIDTH / idct->buffer_width, + (float)VL_BLOCK_HEIGHT / idct->buffer_height); + + ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); - ureg_release_temporary(shader, t_vpos); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); + calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); + + ureg_release_temporary(shader, t_tex); ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void -matrix_mul(struct ureg_program *shader, struct ureg_dst dst, - struct ureg_src tc[2], struct ureg_src sampler[2], - struct ureg_src start[2], struct ureg_src step[2], - bool fetch4[2], float scale) +static void * +create_mismatch_frag_shader(struct vl_idct *idct) { - struct ureg_dst t_tc[2], m[2][2], tmp[2]; - unsigned side, i, j; - - for(i = 0; i < 2; ++i) { - t_tc[i] = ureg_DECL_temporary(shader); - for(j = 0; j < 2; ++j) - m[i][j] = ureg_DECL_temporary(shader); - tmp[i] = ureg_DECL_temporary(shader); - } + struct ureg_program *shader; - /* - * m[0..1][0] = ? - * tmp[0..1] = dot4(m[0..1][0], m[0..1][1]) - * fragment = tmp[0] + tmp[1] - */ - ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_scalar(start[0], TGSI_SWIZZLE_X)); - ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), ureg_scalar(tc[0], TGSI_SWIZZLE_Y)); - - if(fetch4[1]) { - ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(start[1], TGSI_SWIZZLE_Y)); - ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(tc[1], TGSI_SWIZZLE_X)); - } else { - ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(tc[1], TGSI_SWIZZLE_X)); - ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(start[1], TGSI_SWIZZLE_Y)); + struct ureg_src addr[2]; + + struct ureg_dst m[8][2]; + struct ureg_dst fragment; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + for (i = 0; i < 8; ++i) { + m[i][0] = ureg_DECL_temporary(shader); + m[i][1] = ureg_DECL_temporary(shader); } - for(side = 0; side < 2; ++side) { - for(i = 0; i < 2; ++i) { - if(fetch4[side]) { - ureg_TEX(shader, m[i][side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]); - ureg_MOV(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), step[side]); + for (i = 0; i < 8; ++i) { + increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); + } - } else for(j = 0; j < 4; ++j) { - /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */ - ureg_TEX(shader, tmp[side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]); - ureg_MOV(shader, ureg_writemask(m[i][side], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[side]), TGSI_SWIZZLE_X)); + for (i = 0; i < 8; ++i) { + struct ureg_src s_addr[2]; + s_addr[0] = ureg_src(m[i][0]); + s_addr[1] = ureg_src(m[i][1]); + fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); + } - ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X << side), ureg_src(t_tc[side]), step[side]); - } - } + for (i = 1; i < 8; ++i) { + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); + ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); } - ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1])); - ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1])); - ureg_ADD(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1])); - ureg_MUL(shader, dst, ureg_src(tmp[0]), ureg_imm1f(shader, scale)); + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); + ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); + + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); + ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); + ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); + + ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), + ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), + ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); + + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); + ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); - for(i = 0; i < 2; ++i) { - ureg_release_temporary(shader, t_tc[i]); - for(j = 0; j < 2; ++j) - ureg_release_temporary(shader, m[i][j]); - ureg_release_temporary(shader, tmp[i]); + for (i = 0; i < 8; ++i) { + ureg_release_temporary(shader, m[i][0]); + ureg_release_temporary(shader, m[i][1]); } + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); } static void * -create_transpose_frag_shader(struct vl_idct *idct) +create_stage1_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src tc[2], sampler[2]; - struct ureg_src start[2], step[2]; - struct ureg_dst fragment; - bool fetch4[2]; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_tex, t_start; + struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; - tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); - tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); - start[0] = ureg_imm1f(shader, 0.0f); - start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); + t_tex = ureg_DECL_temporary(shader); + t_start = ureg_DECL_temporary(shader); - step[0] = ureg_imm1f(shader, 4.0f / BLOCK_HEIGHT); - step[1] = ureg_imm1f(shader, 1.0f / idct->destination->height0); + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - sampler[0] = ureg_DECL_sampler(shader, 0); - sampler[1] = ureg_DECL_sampler(shader, 1); + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); - fetch4[0] = true; - fetch4[1] = false; + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + /* + * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = vpos + vrect + * o_vpos.xy = t_vpos * scale + * o_vpos.zw = vpos + * + * o_l_addr = calc_addr(...) + * o_r_addr = calc_addr(...) + * + */ + + scale = ureg_imm2f(shader, + (float)VL_BLOCK_WIDTH / idct->buffer_width, + (float)VL_BLOCK_HEIGHT / idct->buffer_height); + + ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE1_SCALE); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4); + + ureg_release_temporary(shader, t_tex); + ureg_release_temporary(shader, t_start); ureg_END(shader); @@ -233,304 +318,370 @@ create_transpose_frag_shader(struct vl_idct *idct) } static void * -create_matrix_frag_shader(struct vl_idct *idct) +create_stage1_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; - struct ureg_src tc[2], sampler[2]; - struct ureg_src start[2], step[2]; - struct ureg_dst fragment; - bool fetch4[2]; + + struct ureg_src l_addr[2], r_addr[2]; + + struct ureg_dst l[4][2], r[2]; + struct ureg_dst *fragment; + + int i, j; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; - tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); - tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); + fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst)); - start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT); - start[1] = ureg_imm1f(shader, 0.0f); + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); - step[0] = ureg_imm1f(shader, 1.0f / idct->destination->width0); - step[1] = ureg_imm1f(shader, 4.0f / BLOCK_WIDTH); + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); - sampler[0] = ureg_DECL_sampler(shader, 1); - sampler[1] = ureg_DECL_sampler(shader, 0); + for (i = 0; i < idct->nr_of_render_targets; ++i) + fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); - fetch4[0] = false; - fetch4[1] = true; + for (i = 0; i < 4; ++i) { + l[i][0] = ureg_DECL_temporary(shader); + l[i][1] = ureg_DECL_temporary(shader); + } - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); - matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE2_SCALE); + for (i = 0; i < 4; ++i) { + increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); + } - ureg_END(shader); + for (i = 0; i < 4; ++i) { + struct ureg_src s_addr[2]; + s_addr[0] = ureg_src(l[i][0]); + s_addr[1] = ureg_src(l[i][1]); + fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); + } - return ureg_create_shader_and_destroy(shader, idct->pipe); -} + for (i = 0; i < idct->nr_of_render_targets; ++i) { + struct ureg_src s_addr[2]; -static void * -create_empty_block_frag_shader(struct vl_idct *idct) -{ - struct ureg_program *shader; - struct ureg_dst fragment; + increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT); - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); - if (!shader) - return NULL; + s_addr[0] = ureg_src(r[0]); + s_addr[1] = ureg_src(r[1]); + fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + for (j = 0; j < 4; ++j) { + matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); + } + } - ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f)); + for (i = 0; i < 4; ++i) { + ureg_release_temporary(shader, l[i][0]); + ureg_release_temporary(shader, l[i][1]); + } + ureg_release_temporary(shader, r[0]); + ureg_release_temporary(shader, r[1]); ureg_END(shader); + FREE(fragment); + return ureg_create_shader_and_destroy(shader, idct->pipe); } -static void -xfer_buffers_map(struct vl_idct *idct) +void +vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_output, struct ureg_dst tex) { - struct pipe_box rect = - { - 0, 0, 0, - idct->destination->width0, - idct->destination->height0, - 1 - }; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_start; + struct ureg_dst o_l_addr[2], o_r_addr[2]; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_start = ureg_DECL_temporary(shader); + + --first_output; + + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); + o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); + + o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); + o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); + + scale = ureg_imm2f(shader, + (float)VL_BLOCK_WIDTH / idct->buffer_width, + (float)VL_BLOCK_HEIGHT / idct->buffer_height); + + ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets)); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); - idct->tex_transfer = idct->pipe->get_transfer - ( - idct->pipe, idct->textures.individual.source, - u_subresource(0, 0), - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - - idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer); - - idct->vectors = pipe_buffer_map - ( - idct->pipe, - idct->vertex_bufs.individual.pos.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &idct->vec_transfer - ); + ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); + ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); } -static void -xfer_buffers_unmap(struct vl_idct *idct) +void +vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, + unsigned first_input, struct ureg_dst fragment) { - pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer); + struct ureg_src l_addr[2], r_addr[2]; + + struct ureg_dst l[2], r[2]; + + --first_input; + + l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); + r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); + + l[0] = ureg_DECL_temporary(shader); + l[1] = ureg_DECL_temporary(shader); + r[0] = ureg_DECL_temporary(shader); + r[1] = ureg_DECL_temporary(shader); + + fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); + fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); - idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer); - idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer); + matrix_mul(shader, fragment, l, r); + + ureg_release_temporary(shader, l[0]); + ureg_release_temporary(shader, l[1]); + ureg_release_temporary(shader, r[0]); + ureg_release_temporary(shader, r[1]); } static bool init_shaders(struct vl_idct *idct) { - idct->vs = create_vert_shader(idct); - idct->transpose_fs = create_transpose_frag_shader(idct); - idct->matrix_fs = create_matrix_frag_shader(idct); - idct->eb_fs = create_empty_block_frag_shader(idct); - - return - idct->vs != NULL && - idct->transpose_fs != NULL && - idct->matrix_fs != NULL && - idct->eb_fs != NULL; + idct->vs_mismatch = create_mismatch_vert_shader(idct); + if (!idct->vs_mismatch) + goto error_vs_mismatch; + + idct->fs_mismatch = create_mismatch_frag_shader(idct); + if (!idct->fs_mismatch) + goto error_fs_mismatch; + + idct->vs = create_stage1_vert_shader(idct); + if (!idct->vs) + goto error_vs; + + idct->fs = create_stage1_frag_shader(idct); + if (!idct->fs) + goto error_fs; + + return true; + +error_fs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs); + +error_vs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + +error_fs_mismatch: + idct->pipe->delete_vs_state(idct->pipe, idct->fs); + +error_vs_mismatch: + return false; } static void cleanup_shaders(struct vl_idct *idct) { + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); idct->pipe->delete_vs_state(idct->pipe, idct->vs); - idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs); - idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs); - idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs); + idct->pipe->delete_fs_state(idct->pipe, idct->fs); } static bool -init_buffers(struct vl_idct *idct) +init_state(struct vl_idct *idct) { - struct pipe_resource template; - struct pipe_sampler_view sampler_view; - struct pipe_vertex_element vertex_elems[2]; + struct pipe_blend_state blend; + struct pipe_rasterizer_state rs_state; + struct pipe_sampler_state sampler; unsigned i; - idct->max_blocks = - align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH * - align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT * - idct->destination->depth0; - - memset(&template, 0, sizeof(struct pipe_resource)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R32G32B32A32_FLOAT; - template.last_level = 0; - template.width0 = 2; - template.height0 = 8; - template.depth0 = 1; - template.usage = PIPE_USAGE_IMMUTABLE; - template.bind = PIPE_BIND_SAMPLER_VIEW; - template.flags = 0; - - template.format = idct->destination->format; - template.width0 = idct->destination->width0; - template.height0 = idct->destination->height0; - template.depth0 = idct->destination->depth0; - template.usage = PIPE_USAGE_DYNAMIC; - idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); - - template.usage = PIPE_USAGE_STATIC; - idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); - - for (i = 0; i < 4; ++i) { - if(idct->textures.all[i] == NULL) - return false; /* a texture failed to allocate */ + assert(idct); - u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format); - idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view); + memset(&rs_state, 0, sizeof(rs_state)); + rs_state.point_size = 1; + rs_state.half_pixel_center = true; + rs_state.bottom_edge_rule = true; + rs_state.depth_clip = 1; + idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); + if (!idct->rs_state) + goto error_rs_state; + + memset(&blend, 0, sizeof blend); + + blend.independent_blend_enable = 0; + blend.rt[0].blend_enable = 0; + blend.rt[0].rgb_func = PIPE_BLEND_ADD; + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_func = PIPE_BLEND_ADD; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.rt[0].colormask = PIPE_MASK_RGBA; + blend.dither = 0; + idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend); + if (!idct->blend) + goto error_blend; + + for (i = 0; i < 2; ++i) { + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); + if (!idct->samplers[i]) + goto error_samplers; } - idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f); - idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1; - idct->vertex_bufs.individual.quad.buffer_offset = 0; - idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create - ( - idct->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(struct vertex2f) * 4 * idct->max_blocks - ); - - if(idct->vertex_bufs.individual.quad.buffer == NULL) - return false; - - idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f); - idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1; - idct->vertex_bufs.individual.pos.buffer_offset = 0; - idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create - ( - idct->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(struct vertex2f) * 4 * idct->max_blocks - ); - - if(idct->vertex_bufs.individual.pos.buffer == NULL) - return false; + return true; - /* Rect element */ - vertex_elems[0].src_offset = 0; - vertex_elems[0].instance_divisor = 0; - vertex_elems[0].vertex_buffer_index = 0; - vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; +error_samplers: + for (i = 0; i < 2; ++i) + if (idct->samplers[i]) + idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); - /* Pos element */ - vertex_elems[1].src_offset = 0; - vertex_elems[1].instance_divisor = 0; - vertex_elems[1].vertex_buffer_index = 1; - vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); - idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); +error_blend: + idct->pipe->delete_blend_state(idct->pipe, idct->blend); - return true; +error_rs_state: + return false; } static void -cleanup_buffers(struct vl_idct *idct) +cleanup_state(struct vl_idct *idct) { unsigned i; - assert(idct); + for (i = 0; i < 2; ++i) + idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); - for (i = 0; i < 4; ++i) { - pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL); - pipe_resource_reference(&idct->textures.all[i], NULL); - } + idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->delete_blend_state(idct->pipe, idct->blend); +} + +static bool +init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + struct pipe_resource *tex; + struct pipe_surface surf_templ; + + assert(idct && buffer); + + tex = buffer->sampler_views.individual.source->texture; + + buffer->fb_state_mismatch.width = tex->width0; + buffer->fb_state_mismatch.height = tex->height0; + buffer->fb_state_mismatch.nr_cbufs = 1; + + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_templ.u.tex.first_layer = 0; + surf_templ.u.tex.last_layer = 0; + buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); + + buffer->viewport_mismatch.scale[0] = tex->width0; + buffer->viewport_mismatch.scale[1] = tex->height0; + buffer->viewport_mismatch.scale[2] = 1; + buffer->viewport_mismatch.scale[3] = 1; - idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL); - pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL); + return true; } static void -init_constants(struct vl_idct *idct) +cleanup_source(struct vl_idct_buffer *buffer) { - struct pipe_transfer *buf_transfer; - struct vertex2f *v; + assert(buffer); - unsigned i; + pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); - /* quad vectors */ - v = pipe_buffer_map - ( - idct->pipe, - idct->vertex_bufs.individual.quad.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer - ); - for ( i = 0; i < idct->max_blocks; ++i) - memcpy(v + i * 4, &const_quad, sizeof(const_quad)); - pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer); + pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); } -static void -init_state(struct vl_idct *idct) +static bool +init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) { - struct pipe_sampler_state sampler; + struct pipe_resource *tex; + struct pipe_surface surf_templ; unsigned i; - idct->num_blocks = 0; - idct->num_empty_blocks = 0; + assert(idct && buffer); - idct->viewport.scale[0] = idct->destination->width0; - idct->viewport.scale[1] = idct->destination->height0; - idct->viewport.scale[2] = 1; - idct->viewport.scale[3] = 1; - idct->viewport.translate[0] = 0; - idct->viewport.translate[1] = 0; - idct->viewport.translate[2] = 0; - idct->viewport.translate[3] = 0; + tex = buffer->sampler_views.individual.intermediate->texture; - idct->fb_state.width = idct->destination->width0; - idct->fb_state.height = idct->destination->height0; - idct->fb_state.nr_cbufs = 1; - idct->fb_state.zsbuf = NULL; + buffer->fb_state.width = tex->width0; + buffer->fb_state.height = tex->height0; + buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; + for(i = 0; i < idct->nr_of_render_targets; ++i) { + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_templ.u.tex.first_layer = i; + surf_templ.u.tex.last_layer = i; + buffer->fb_state.cbufs[i] = idct->pipe->create_surface( + idct->pipe, tex, &surf_templ); - for (i = 0; i < 4; ++i) { - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.shadow_ambient = ; */ - /*sampler.lod_bias = ; */ - sampler.min_lod = 0; - /*sampler.max_lod = ; */ - /*sampler.border_color[0] = ; */ - /*sampler.max_anisotropy = ; */ - idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); + if (!buffer->fb_state.cbufs[i]) + goto error_surfaces; } + + buffer->viewport.scale[0] = tex->width0; + buffer->viewport.scale[1] = tex->height0; + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; + + return true; + +error_surfaces: + for(i = 0; i < idct->nr_of_render_targets; ++i) + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); + + return false; } static void -cleanup_state(struct vl_idct *idct) +cleanup_intermediate(struct vl_idct_buffer *buffer) { unsigned i; - for (i = 0; i < 4; ++i) - idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]); + assert(buffer); + + for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) + pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); + + pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); } -struct pipe_resource * -vl_idct_upload_matrix(struct pipe_context *pipe) +struct pipe_sampler_view * +vl_idct_upload_matrix(struct pipe_context *pipe, float scale) { - struct pipe_resource template, *matrix; + struct pipe_resource tex_templ, *matrix; + struct pipe_sampler_view sv_templ, *sv; struct pipe_transfer *buf_transfer; unsigned i, j, pitch; float *f; @@ -538,189 +689,174 @@ vl_idct_upload_matrix(struct pipe_context *pipe) struct pipe_box rect = { 0, 0, 0, - BLOCK_WIDTH, - BLOCK_HEIGHT, + VL_BLOCK_WIDTH / 4, + VL_BLOCK_HEIGHT, 1 }; - memset(&template, 0, sizeof(struct pipe_resource)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R32G32B32A32_FLOAT; - template.last_level = 0; - template.width0 = 2; - template.height0 = 8; - template.depth0 = 1; - template.usage = PIPE_USAGE_IMMUTABLE; - template.bind = PIPE_BIND_SAMPLER_VIEW; - template.flags = 0; - - matrix = pipe->screen->resource_create(pipe->screen, &template); - - /* matrix */ - buf_transfer = pipe->get_transfer - ( - pipe, matrix, - u_subresource(0, 0), - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format); - - f = pipe->transfer_map(pipe, buf_transfer); - for(i = 0; i < BLOCK_HEIGHT; ++i) - for(j = 0; j < BLOCK_WIDTH; ++j) - f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose + assert(pipe); + + memset(&tex_templ, 0, sizeof(tex_templ)); + tex_templ.target = PIPE_TEXTURE_2D; + tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; + tex_templ.last_level = 0; + tex_templ.width0 = 2; + tex_templ.height0 = 8; + tex_templ.depth0 = 1; + tex_templ.array_size = 1; + tex_templ.usage = PIPE_USAGE_IMMUTABLE; + tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; + tex_templ.flags = 0; + + matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); + if (!matrix) + goto error_matrix; + + f = pipe->transfer_map(pipe, matrix, 0, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE, + &rect, &buf_transfer); + if (!f) + goto error_map; + + pitch = buf_transfer->stride / sizeof(float); + + for(i = 0; i < VL_BLOCK_HEIGHT; ++i) + for(j = 0; j < VL_BLOCK_WIDTH; ++j) + // transpose and scale + f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; pipe->transfer_unmap(pipe, buf_transfer); - pipe->transfer_destroy(pipe, buf_transfer); - return matrix; + memset(&sv_templ, 0, sizeof(sv_templ)); + u_sampler_view_default_template(&sv_templ, matrix, matrix->format); + sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); + pipe_resource_reference(&matrix, NULL); + if (!sv) + goto error_map; + + return sv; + +error_map: + pipe_resource_reference(&matrix, NULL); + +error_matrix: + return NULL; } -bool -vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix) +bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned nr_of_render_targets, + struct pipe_sampler_view *matrix, + struct pipe_sampler_view *transpose) { - assert(idct && pipe && dst); + assert(idct && pipe); + assert(matrix && transpose); idct->pipe = pipe; - pipe_resource_reference(&idct->textures.individual.matrix, matrix); - pipe_resource_reference(&idct->textures.individual.transpose, matrix); - pipe_resource_reference(&idct->destination, dst); + idct->buffer_width = buffer_width; + idct->buffer_height = buffer_height; + idct->nr_of_render_targets = nr_of_render_targets; - init_state(idct); + pipe_sampler_view_reference(&idct->matrix, matrix); + pipe_sampler_view_reference(&idct->transpose, transpose); if(!init_shaders(idct)) return false; - if(!init_buffers(idct)) { + if(!init_state(idct)) { cleanup_shaders(idct); return false; } - idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface( - idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET); - - idct->surfaces.destination = idct->pipe->screen->get_tex_surface( - idct->pipe->screen, idct->destination, 0, 0, 0, - PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET); - - init_constants(idct); - xfer_buffers_map(idct); - return true; } void vl_idct_cleanup(struct vl_idct *idct) { - idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination); - idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate); - cleanup_shaders(idct); - cleanup_buffers(idct); - cleanup_state(idct); - pipe_resource_reference(&idct->destination, NULL); + pipe_sampler_view_reference(&idct->matrix, NULL); + pipe_sampler_view_reference(&idct->transpose, NULL); } -void -vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block) +bool +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_sampler_view *source, + struct pipe_sampler_view *intermediate) { - struct vertex2f v, *v_dst; - - unsigned tex_pitch; - short *texels; - - unsigned i; - - assert(idct); - - if(block) { - tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format); - texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; - - for (i = 0; i < BLOCK_HEIGHT; ++i) - memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2); - - /* non empty blocks fills the vector buffer from left to right */ - v_dst = idct->vectors + idct->num_blocks * 4; + assert(buffer && idct); + assert(source && intermediate); - idct->num_blocks++; + memset(buffer, 0, sizeof(struct vl_idct_buffer)); - } else { + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); + pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); + pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); + pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); - /* while empty blocks fills the vector buffer from right to left */ - v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4; - - idct->num_empty_blocks++; - } + if (!init_source(idct, buffer)) + return false; - v.x = x; - v.y = y; + if (!init_intermediate(idct, buffer)) + return false; - for (i = 0; i < 4; ++i) { - v_dst[i] = v; - } + return true; } void -vl_idct_flush(struct vl_idct *idct) +vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) { - xfer_buffers_unmap(idct); - - if(idct->num_blocks > 0) { + assert(buffer); - /* first stage */ - idct->fb_state.cbufs[0] = idct->surfaces.intermediate; - idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); - idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); + cleanup_source(buffer); + cleanup_intermediate(buffer); - idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); - idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]); - idct->pipe->bind_vs_state(idct->pipe, idct->vs); - idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - - util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); - - /* second stage */ - idct->fb_state.cbufs[0] = idct->surfaces.destination; - idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); - idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); - - idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); - idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]); - idct->pipe->bind_vs_state(idct->pipe, idct->vs); - idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - - util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); - } - - if(idct->num_empty_blocks > 0) { - - /* empty block handling */ - idct->fb_state.cbufs[0] = idct->surfaces.destination; - idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); - idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); - - idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); - idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all); - idct->pipe->bind_vs_state(idct->pipe, idct->vs); - idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs); + pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); + pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); +} - util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, - (idct->max_blocks - idct->num_empty_blocks) * 4, - idct->num_empty_blocks * 4); - } +void +vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) +{ + assert(buffer); + + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_blend_state(idct->pipe, idct->blend); + + idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT, + 0, 2, idct->samplers); + + idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, + buffer->sampler_views.stage[0]); + + /* mismatch control */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); + idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch); + idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); + + /* first stage */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); + idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->fs); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); +} - idct->num_blocks = 0; - idct->num_empty_blocks = 0; - xfer_buffers_map(idct); +void +vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + assert(buffer); + + /* second stage */ + idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); + idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT, + 0, 2, idct->samplers); + idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, + 0, 2, buffer->sampler_views.stage[1]); } +