From 3511780a43077d1359bd491eadb4ab9b3b86795a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Tue, 19 Apr 2011 21:06:59 +0200 Subject: [PATCH] [g3dvl] revert commit 310eea52ca1e997295c84163066cc5d0fd4f8cf6 Using a seperate vertex buffer for mc and ycbcr handling is still better. --- src/gallium/auxiliary/vl/vl_idct.c | 93 ++--- src/gallium/auxiliary/vl/vl_idct.h | 2 - src/gallium/auxiliary/vl/vl_mc.c | 358 +++++++++---------- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 27 +- src/gallium/auxiliary/vl/vl_vertex_buffers.c | 110 +++--- src/gallium/auxiliary/vl/vl_vertex_buffers.h | 17 +- 6 files changed, 252 insertions(+), 355 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 4a500a7489c..ca3b1cb53ab 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -90,11 +90,10 @@ static void * create_vert_shader(struct vl_idct *idct, bool matrix_stage) { struct ureg_program *shader; - struct ureg_src vrect, vpos, vblock, eb; - struct ureg_src scale, blocks_xy; + struct ureg_src vrect, vpos; + struct ureg_src scale; struct ureg_dst t_tex, t_start; struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; - unsigned label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) @@ -105,12 +104,9 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); - vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - eb = ureg_DECL_vs_input(shader, VS_I_EB); - o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); @@ -119,75 +115,39 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) /* * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) - * blocks_xy = (blocks_x, blocks_y) - * - * if eb.(vblock.y, vblock.x) - * o_vpos.xy = -1 - * else - * t_tex = vpos * blocks_xy + vblock - * t_start = t_tex * scale - * t_tex = t_tex + vrect - * o_vpos.xy = t_tex * scale * - * o_l_addr = calc_addr(...) - * o_r_addr = calc_addr(...) - * endif + * t_vpos = vpos + vrect + * o_vpos.xy = t_vpos * scale * o_vpos.zw = vpos * + * o_l_addr = calc_addr(...) + * o_r_addr = calc_addr(...) + * */ scale = ureg_imm2f(shader, (float)BLOCK_WIDTH / idct->buffer_width, (float)BLOCK_HEIGHT / idct->buffer_height); - blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y); + ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); - if (idct->blocks_x > 1 || idct->blocks_y > 1) { - ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), - ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)), - ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W), - ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); - ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), - ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)), - ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X)); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); - eb = ureg_src(t_tex); + if(matrix_stage) { + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); + } else { + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); } - ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label); - - ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f)); - - ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); - ureg_ELSE(shader, &label); - - ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock); - ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - - ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect); - - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), - ureg_scalar(vrect, TGSI_SWIZZLE_X), - ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets)); - - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); - - if(matrix_stage) { - calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); - calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); - } else { - calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); - calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); - } - - ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); - ureg_ENDIF(shader); - - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); - ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_start); @@ -607,7 +567,6 @@ error_matrix: bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, - unsigned blocks_x, unsigned blocks_y, unsigned nr_of_render_targets, struct pipe_sampler_view *matrix, struct pipe_sampler_view *transpose) @@ -617,8 +576,6 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, idct->pipe = pipe; idct->buffer_width = buffer_width; idct->buffer_height = buffer_height; - idct->blocks_x = blocks_x; - idct->blocks_y = blocks_y; idct->nr_of_render_targets = nr_of_render_targets; pipe_sampler_view_reference(&idct->matrix, matrix); @@ -706,14 +663,10 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) { - unsigned num_verts; - assert(idct); assert(buffer); if(num_instances > 0) { - num_verts = idct->blocks_x * idct->blocks_y * 4; - idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); idct->pipe->bind_blend_state(idct->pipe, idct->blend); idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); @@ -724,7 +677,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); /* second stage */ idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); @@ -732,6 +685,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]); idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); } } diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h index 7f00e3e28b4..96933b9d889 100644 --- a/src/gallium/auxiliary/vl/vl_idct.h +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -39,7 +39,6 @@ struct vl_idct unsigned buffer_width; unsigned buffer_height; - unsigned blocks_x, blocks_y; unsigned nr_of_render_targets; void *rs_state; @@ -77,7 +76,6 @@ struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float /* init an idct instance */ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, - unsigned blocks_x, unsigned blocks_y, unsigned nr_of_render_targets, struct pipe_sampler_view *matrix, struct pipe_sampler_view *transpose); diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c index 2624d0502c1..9785327cdbe 100644 --- a/src/gallium/auxiliary/vl/vl_mc.c +++ b/src/gallium/auxiliary/vl/vl_mc.c @@ -41,15 +41,13 @@ enum VS_OUTPUT { VS_O_VPOS, - VS_O_LINE, VS_O_VTOP, VS_O_VBOTTOM }; static struct ureg_dst -calc_position(struct vl_mc *r, struct ureg_program *shader) +calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale) { - struct ureg_src block_scale; struct ureg_src vrect, vpos; struct ureg_dst t_vpos; struct ureg_dst o_vpos; @@ -68,111 +66,32 @@ calc_position(struct vl_mc *r, struct ureg_program *shader) * o_vpos.xy = t_vpos * o_vpos.zw = vpos */ - block_scale = ureg_imm2f(shader, - (float)MACROBLOCK_WIDTH / r->buffer_width, - (float)MACROBLOCK_HEIGHT / r->buffer_height); - ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); return t_vpos; } -static void * -create_ycbcr_vert_shader(struct vl_mc *r) +static struct ureg_dst +calc_line(struct ureg_program *shader) { - struct ureg_program *shader; - struct ureg_src block_scale; - struct ureg_src vrect, vpos, eb, flags; - struct ureg_dst t_vpos, t_vtex; - struct ureg_dst o_line, o_vtex[2]; - unsigned label; - - shader = ureg_create(TGSI_PROCESSOR_VERTEX); - if (!shader) - return NULL; - - vrect = ureg_DECL_vs_input(shader, VS_I_RECT); - vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); - eb = ureg_DECL_vs_input(shader, VS_I_EB); - flags = ureg_DECL_vs_input(shader, VS_I_FLAGS); + struct ureg_dst tmp; + struct ureg_src pos; - t_vpos = calc_position(r, shader); - t_vtex = ureg_DECL_temporary(shader); + tmp = ureg_DECL_temporary(shader); - o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE); - o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); - o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); + pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR); /* - * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height) - * - * o_line.x = interlaced - * o_line.y = vrect - * - * o_vtex[0].z = vrect.x ? eb.y : eb.x - * o_vtex[1].z = vrect.x ? eb.w : eb.z - * - * if(interlaced) { - * t_vtex.x = vrect.x - * t_vtex.y = vrect.y * 0.5 - * t_vtex += vpos - * - * o_vtex[0].xy = t_vtex * block_scale - * - * t_vtex.y += 0.5 - * o_vtex[1].xy = t_vtex * block_scale - * } else { - * o_vtex[0..1].xy = t_vpos - * } - * o_vtex[2].xy = t_vpos - * + * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0 */ - block_scale = ureg_imm2f(shader, - (float)MACROBLOCK_WIDTH / r->buffer_width, - (float)MACROBLOCK_HEIGHT / r->buffer_height); - - ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f)); - ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect); - - ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z), - ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), - ureg_scalar(eb, TGSI_SWIZZLE_Y), - ureg_scalar(eb, TGSI_SWIZZLE_X)); - - ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z), - ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), - ureg_scalar(eb, TGSI_SWIZZLE_W), - ureg_scalar(eb, TGSI_SWIZZLE_Z)); - - if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO - ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label); - - ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect); - ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f)); - ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex)); - ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale); - ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f)); - ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale); - - ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), - ureg_scalar(vrect, TGSI_SWIZZLE_Y), - ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2)); - - ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); - ureg_ENDIF(shader); - } - - ureg_release_temporary(shader, t_vtex); - ureg_release_temporary(shader, t_vpos); - - ureg_END(shader); + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f)); + ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp)); + ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); - return ureg_create_shader_and_destroy(shader, r->pipe); + return tmp; } static void * @@ -182,7 +101,7 @@ create_ref_vert_shader(struct vl_mc *r) struct ureg_src mv_scale; struct ureg_src vrect, vmv[2]; struct ureg_dst t_vpos; - struct ureg_dst o_vpos, o_line, o_vmv[2]; + struct ureg_dst o_vpos, o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); @@ -190,31 +109,29 @@ create_ref_vert_shader(struct vl_mc *r) return NULL; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); - ureg_DECL_vs_input(shader, VS_I_EB); - ureg_DECL_vs_input(shader, VS_I_FLAGS); vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); - t_vpos = calc_position(r, shader); + t_vpos = calc_position(r, shader, ureg_imm2f(shader, + (float)MACROBLOCK_WIDTH / r->buffer_width, + (float)MACROBLOCK_HEIGHT / r->buffer_height) + ); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); /* - * mv_scale = 0.5 / (dst.width, dst.height); + * mv_scale.xy = 0.5 / (dst.width, dst.height); + * mv_scale.z = 1.0f / 4.0f + * mv_scale.w = 1.0f / 255.0f * * // Apply motion vectors - * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale - * - * o_line.y = vrect + * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos + * o_vmv[0..1].zw = vmv[0..1] * mv_scale * */ - ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), - vrect, ureg_imm1f(shader, r->macroblock_size / 2)); - mv_scale = ureg_imm4f(shader, 0.5f / r->buffer_width, 0.5f / r->buffer_height, @@ -233,90 +150,6 @@ create_ref_vert_shader(struct vl_mc *r) return ureg_create_shader_and_destroy(shader, r->pipe); } -static struct ureg_dst -calc_field(struct ureg_program *shader) -{ - struct ureg_dst tmp; - struct ureg_src line; - - tmp = ureg_DECL_temporary(shader); - - line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR); - - /* - * line.x is flag for intra frames - * line.y going from 0 to 1 if not interlaced - * line.y going from 0 to 8 in steps of 0.5 if interlaced - * - * tmp.xy = fraction(line) - * tmp.xy = tmp.xy >= 0.5 ? 1 : 0 - */ - ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line); - ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line); - ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); - - return tmp; -} - -static void * -create_ycbcr_frag_shader(struct vl_mc *r, float scale) -{ - struct ureg_program *shader; - struct ureg_src tc[2], sampler; - struct ureg_dst texel, t_tc, field; - struct ureg_dst fragment; - unsigned label; - - shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); - if (!shader) - return NULL; - - tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); - tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); - - sampler = ureg_DECL_sampler(shader, 0); - - t_tc = ureg_DECL_temporary(shader); - texel = ureg_DECL_temporary(shader); - - fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - - field = calc_field(shader); - - /* - * texel.y = tex(field.y ? tc[1] : tc[0], sampler[0]) - * texel.cb = tex(tc[2], sampler[1]) - * texel.cr = tex(tc[2], sampler[2]) - */ - - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ), - ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), - tc[1], tc[0]); - - ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f)); - - ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f)); - ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label); - - ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler); - - if (scale != 1.0f) - ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), - ureg_src(texel), ureg_imm1f(shader, scale), - ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)); - else - ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), - ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)); - - ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); - ureg_ENDIF(shader); - - ureg_release_temporary(shader, t_tc); - ureg_release_temporary(shader, texel); - - return ureg_create_shader_and_destroy(shader, r->pipe); -} - static void * create_ref_frag_shader(struct vl_mc *r) { @@ -342,7 +175,7 @@ create_ref_frag_shader(struct vl_mc *r) fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); - field = calc_field(shader); + field = calc_line(shader); /* * ref = field.z ? tc[1] : tc[0] @@ -386,6 +219,149 @@ create_ref_frag_shader(struct vl_mc *r) return ureg_create_shader_and_destroy(shader, r->pipe); } +static void * +create_ycbcr_vert_shader(struct vl_mc *r) +{ + struct ureg_program *shader; + + struct ureg_src vrect, vpos; + struct ureg_dst t_vpos, t_vtex; + struct ureg_dst o_vpos, o_vtex; + + struct vertex2f scale = { + (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size, + (float)BLOCK_HEIGHT / r->buffer_height * MACROBLOCK_HEIGHT / r->macroblock_size + }; + + unsigned label; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y)); + t_vtex = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); + + /* + * o_vtex.xy = t_vpos + * o_vtex.z = intra * 0.5 + * + * if(interlaced) { + * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } + * t_vtex.z = vpos.y % 2 + * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y + * o_vpos.y = t_vtex.y + t_vpos.y + * + * o_vtex.w = t_vtex.z ? 0 : 1 + * } + * + */ + ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z), + ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); + ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); + + if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO + ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); + + ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)), + ureg_imm2f(shader, 0.0f, scale.y), + ureg_imm2f(shader, -scale.y, 0.0f)); + ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), + ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f)); + + ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex)); + + ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), + ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), + ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X), + ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y)); + ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), + ureg_src(t_vpos), ureg_src(t_vtex)); + + ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), + ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), + ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + } + + ureg_release_temporary(shader, t_vtex); + ureg_release_temporary(shader, t_vpos); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + +static void * +create_ycbcr_frag_shader(struct vl_mc *r, float scale) +{ + struct ureg_program *shader; + struct ureg_src tc, sampler; + struct ureg_dst tmp; + struct ureg_dst fragment; + unsigned label; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); + + sampler = ureg_DECL_sampler(shader, 0); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + tmp = calc_line(shader); + + /* + * if (field == tc.w) + * kill(); + * else { + * fragment.xyz = tex(tc, sampler) * scale + tc.z + * fragment.w = 1.0f + * } + */ + + ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), + ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp)); + + ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); + + ureg_KILP(shader); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ELSE(shader, &label); + + ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler); + + if (scale != 1.0f) + ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), + ureg_src(tmp), ureg_imm1f(shader, scale), + ureg_scalar(tc, TGSI_SWIZZLE_Z)); + else + ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), + ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z)); + + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + + ureg_release_temporary(shader, tmp); + + return ureg_create_shader_and_destroy(shader, r->pipe); +} + static bool init_pipe_state(struct vl_mc *r) { diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 7e72fbaee3f..182294894c1 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -108,7 +108,9 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer) } static void -upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsigned y, short *block) +upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, + unsigned x, unsigned y, short *block, + bool intra, enum pipe_mpeg12_dct_type type) { unsigned tex_pitch; short *texels; @@ -118,6 +120,8 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsign assert(buffer); assert(block); + vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type); + tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short); texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; @@ -142,7 +146,8 @@ upload_buffer(struct vl_mpeg12_decoder *ctx, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) { if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) { - upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks); + upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks, + mb->dct_intra, mb->dct_type); blocks += BLOCK_WIDTH * BLOCK_HEIGHT; } } @@ -153,7 +158,8 @@ upload_buffer(struct vl_mpeg12_decoder *ctx, for (tb = 1; tb < 3; ++tb) { if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) { - upload_block(buffer, tb, mb->mbx, mb->mby, blocks); + upload_block(buffer, tb, mb->mbx, mb->mby, blocks, + mb->dct_intra, mb->dct_type); blocks += BLOCK_WIDTH * BLOCK_HEIGHT; } } @@ -245,7 +251,7 @@ vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer, assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); for ( i = 0; i < num_macroblocks; ++i ) { - vl_vb_add_block(&buf->vertex_stream, &mb[i], dec->empty_block_mask); + vl_vb_add_block(&buf->vertex_stream, &mb[i]); upload_buffer(dec, buf, &mb[i]); } } @@ -565,7 +571,7 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec, static bool init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height) { - unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y; + unsigned chroma_width, chroma_height; struct pipe_sampler_view *matrix, *transpose; float matrix_scale, transpose_scale; @@ -619,28 +625,21 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_ pipe_sampler_view_reference(&transpose, matrix); if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height, - 2, 2, dec->nr_of_idct_render_targets, matrix, transpose)) + dec->nr_of_idct_render_targets, matrix, transpose)) goto error_y; if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { chroma_width = buffer_width / 2; chroma_height = buffer_height / 2; - chroma_blocks_x = 1; - chroma_blocks_y = 1; } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { chroma_width = buffer_width; chroma_height = buffer_height / 2; - chroma_blocks_x = 2; - chroma_blocks_y = 1; } else { chroma_width = buffer_width; chroma_height = buffer_height; - chroma_blocks_x = 2; - chroma_blocks_y = 2; } if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height, - chroma_blocks_x, chroma_blocks_y, dec->nr_of_idct_render_targets, matrix, transpose)) goto error_c; @@ -696,7 +695,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context, dec->pipe = pipe; - dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2); + dec->quads = vl_vb_upload_quads(dec->pipe); dec->pos = vl_vb_upload_pos( dec->pipe, dec->base.width / MACROBLOCK_WIDTH, diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index 89815c49e68..212ace7512a 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -32,11 +32,10 @@ struct vl_ycbcr_vertex_stream { - struct vertex2s pos; - uint8_t mb_type_intra; - uint8_t dct_type_field; - uint8_t dummy[2]; - uint8_t eb[2][2]; + uint8_t x; + uint8_t y; + uint8_t intra; + uint8_t field; }; struct vl_mv_vertex_stream @@ -50,25 +49,25 @@ static const struct vertex2f block_quad[4] = { }; struct pipe_vertex_buffer -vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y) +vl_vb_upload_quads(struct pipe_context *pipe) { struct pipe_vertex_buffer quad; struct pipe_transfer *buf_transfer; - struct vertex4f *v; + struct vertex2f *v; - unsigned x, y, i; + unsigned i; assert(pipe); /* create buffer */ - quad.stride = sizeof(struct vertex4f); + quad.stride = sizeof(struct vertex2f); quad.buffer_offset = 0; quad.buffer = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STATIC, - sizeof(struct vertex4f) * 4 * blocks_x * blocks_y + sizeof(struct vertex2f) * 4 ); if(!quad.buffer) @@ -83,16 +82,9 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks &buf_transfer ); - for ( y = 0; y < blocks_y; ++y) { - for ( x = 0; x < blocks_x; ++x) { - for (i = 0; i < 4; ++i, ++v) { - v->x = block_quad[i].x; - v->y = block_quad[i].y; - - v->z = x; - v->w = y; - } - } + for (i = 0; i < 4; ++i, ++v) { + v->x = block_quad[i].x; + v->y = block_quad[i].y; } pipe_buffer_unmap(pipe, buf_transfer); @@ -155,14 +147,14 @@ vl_vb_get_quad_vertex_element(void) element.src_offset = 0; element.instance_divisor = 0; element.vertex_buffer_index = 0; - element.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + element.src_format = PIPE_FORMAT_R32G32_FLOAT; return element; } static void vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements, - unsigned vertex_buffer_index) + unsigned vertex_buffer_index) { unsigned i, offset = 0; @@ -187,17 +179,11 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe) vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); /* Position element */ - vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; + vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - /* flags */ - vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - - /* empty block element of selected component */ - vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1); + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); - return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems); + return pipe->create_vertex_elements_state(pipe, 2, vertex_elems); } void * @@ -227,7 +213,8 @@ vl_vb_get_ves_mv(struct pipe_context *pipe) } void -vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height) +vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, + unsigned width, unsigned height) { unsigned i, size; @@ -245,7 +232,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - sizeof(struct vl_ycbcr_vertex_stream) * size + sizeof(struct vl_ycbcr_vertex_stream) * size * 4 ); } @@ -319,6 +306,24 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) } +void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, + unsigned component, unsigned x, unsigned y, + bool intra, enum pipe_mpeg12_dct_type type) +{ + struct vl_ycbcr_vertex_stream *stream; + + assert(buffer); + assert(buffer->ycbcr[component].num_instances < buffer->width * buffer->height * 4); + + stream = buffer->ycbcr[component].vertex_stream++; + stream->x = x; + stream->y = y; + stream->intra = intra; + stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD; + + buffer->ycbcr[component].num_instances++; +} + static void get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2]) { @@ -341,47 +346,14 @@ get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvecto dst[1].w = src->bottom.wheight; } -static bool -get_ycbcr_vectors(struct vl_ycbcr_vertex_stream *stream, - struct pipe_mpeg12_macroblock *mb, const unsigned (*empty_block_mask)[2][2]) -{ - bool completely_empty = true; - unsigned i, j; - - stream->pos.x = mb->mbx; - stream->pos.y = mb->mby; - stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; - stream->mb_type_intra = mb->dct_intra; - - for ( i = 0; i < 2; ++i) - for ( j = 0; j < 2; ++j) { - bool empty = !(mb->cbp & (*empty_block_mask)[i][j]); - stream->eb[i][j] = empty; - completely_empty &= empty; - } - - return !completely_empty; -} - void -vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb, - const unsigned (*empty_block_mask)[3][2][2]) +vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb) { - unsigned i, mv_pos; + unsigned mv_pos; assert(buffer); assert(mb); - if(mb->cbp) { - for (i = 0; i < VL_MAX_PLANES; ++i) { - assert(buffer->ycbcr[i].num_instances < buffer->width * buffer->height); - if (get_ycbcr_vectors(buffer->ycbcr[i].vertex_stream, mb, &(*empty_block_mask)[i])) { - buffer->ycbcr[i].vertex_stream++; - buffer->ycbcr[i].num_instances++; - } - } - } - mv_pos = mb->mbx + mb->mby * buffer->width; get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv); get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv); @@ -422,7 +394,7 @@ vl_vb_cleanup(struct vl_vertex_buffer *buffer) assert(buffer); - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { + for (i = 0; i < VL_MAX_PLANES; ++i) { pipe_resource_reference(&buffer->ycbcr[i].resource, NULL); } diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 3d9c6141c5c..5632eb297bd 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -44,11 +44,8 @@ enum VS_INPUT VS_I_RECT, VS_I_VPOS, - VS_I_FLAGS, - VS_I_EB, - - VS_I_MV_TOP = VS_I_FLAGS, - VS_I_MV_BOTTOM = VS_I_EB, + VS_I_MV_TOP, + VS_I_MV_BOTTOM, NUM_VS_INPUTS }; @@ -71,8 +68,7 @@ struct vl_vertex_buffer } mv[VL_MAX_REF_FRAMES]; }; -struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, - unsigned blocks_x, unsigned blocks_y); +struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); @@ -90,8 +86,11 @@ struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int moti void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); -void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb, - const unsigned (*empty_block_mask)[3][2][2]); +void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer, + unsigned component, unsigned x, unsigned y, + bool intra, enum pipe_mpeg12_dct_type type); + +void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb); void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); -- 2.30.2