From: Christian König Date: Tue, 8 Mar 2011 17:34:05 +0000 (+0100) Subject: [g3dvl] use a single vertex buffer for both idct and mc X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=310eea52ca1e997295c84163066cc5d0fd4f8cf6;p=mesa.git [g3dvl] use a single vertex buffer for both idct and mc --- diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index e1e57431a0b..b418aea9514 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -44,14 +44,6 @@ #define NR_RENDER_TARGETS 4 -enum VS_INPUT -{ - VS_I_RECT, - VS_I_VPOS, - - NUM_VS_INPUTS -}; - enum VS_OUTPUT { VS_O_VPOS, @@ -99,13 +91,14 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2], } static void * -create_vert_shader(struct vl_idct *idct, bool matrix_stage) +create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle) { struct ureg_program *shader; - struct ureg_src scale; - struct ureg_src vrect, vpos; + struct ureg_src vrect, vpos, vblock, eb[4]; + struct ureg_src scale, blocks_xy, t_eb; struct ureg_dst t_tex, t_start; struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; + unsigned label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) @@ -116,9 +109,15 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + eb[0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0); + eb[1] = ureg_DECL_vs_input(shader, VS_I_EB_1_0); + eb[2] = ureg_DECL_vs_input(shader, VS_I_EB_0_1); + eb[3] = ureg_DECL_vs_input(shader, VS_I_EB_1_1); + o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); @@ -127,38 +126,74 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage) /* * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) + * blocks_xy = (blocks_x, blocks_y) * - * t_vpos = vpos + vrect - * o_vpos.xy = t_vpos * scale - * o_vpos.zw = vpos + * ar = vblock.y * blocks.x + vblock.x + * if eb[ar].(color_swizzle) + * o_vpos.xy = -1 + * else + * t_tex = vpos * blocks_xy + vblock + * t_start = t_tex * scale + * t_tex = t_tex + vrect + * o_vpos.xy = t_tex * scale * - * o_l_addr = calc_addr(...) - * o_r_addr = calc_addr(...) + * o_l_addr = calc_addr(...) + * o_r_addr = calc_addr(...) + * endif + * o_vpos.zw = vpos * */ + scale = ureg_imm2f(shader, (float)BLOCK_WIDTH / idct->buffer_width, (float)BLOCK_HEIGHT / idct->buffer_height); - ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); - ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), - ureg_scalar(vrect, TGSI_SWIZZLE_X), - ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); + blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y); - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); - ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); + if (idct->blocks_x > 1 || idct->blocks_y > 1) { + struct ureg_dst ar = ureg_DECL_address(shader); - ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); + ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X), + ureg_scalar(vblock, TGSI_SWIZZLE_Y), blocks_xy, vblock); - if(matrix_stage) { - calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); - calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); + ureg_ARL(shader, ureg_writemask(ar, TGSI_WRITEMASK_X), ureg_src(t_tex)); + t_eb = ureg_src_indirect(eb[0], ureg_src(ar)); } else { - calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); - calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); + t_eb = eb[0]; } + ureg_IF(shader, ureg_scalar(t_eb, color_swizzle), &label); + + ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f)); + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ELSE(shader, &label); + + ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock); + ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); + + ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect); + + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z), + ureg_scalar(vrect, TGSI_SWIZZLE_X), + ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS)); + + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); + + if(matrix_stage) { + calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); + calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4); + } else { + calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4); + calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4); + } + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); + ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_start); @@ -326,12 +361,12 @@ create_transpose_frag_shader(struct vl_idct *idct) } static bool -init_shaders(struct vl_idct *idct) +init_shaders(struct vl_idct *idct, int color_swizzle) { - idct->matrix_vs = create_vert_shader(idct, true); + idct->matrix_vs = create_vert_shader(idct, true, color_swizzle); idct->matrix_fs = create_matrix_frag_shader(idct); - idct->transpose_vs = create_vert_shader(idct, false); + idct->transpose_vs = create_vert_shader(idct, false, color_swizzle); idct->transpose_fs = create_transpose_frag_shader(idct); return @@ -353,14 +388,13 @@ cleanup_shaders(struct vl_idct *idct) static bool init_state(struct vl_idct *idct) { - struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; struct pipe_sampler_state sampler; struct pipe_rasterizer_state rs_state; unsigned i; assert(idct); - idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks); + idct->quad = vl_vb_upload_quads(idct->pipe, idct->blocks_x, idct->blocks_y); if(idct->quad.buffer == NULL) return false; @@ -393,13 +427,7 @@ init_state(struct vl_idct *idct) rs_state.gl_rasterization_rules = false; idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); - vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); - - /* Pos element */ - vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; - - idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); - idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems); + idct->vertex_elems_state = vl_vb_get_elems_state(idct->pipe, false); return true; } @@ -473,7 +501,7 @@ cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer) } static bool -init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) +init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_vertex_buffer stream) { assert(idct && buffer); @@ -481,12 +509,9 @@ init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset; pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer); - buffer->vertex_bufs.individual.pos = vl_vb_init( - &buffer->blocks, idct->pipe, idct->max_blocks, - idct->vertex_buffer_stride); - - if(buffer->vertex_bufs.individual.pos.buffer == NULL) - return false; + buffer->vertex_bufs.individual.stream.stride = stream.stride; + buffer->vertex_bufs.individual.stream.buffer_offset = stream.buffer_offset; + pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, stream.buffer); return true; } @@ -497,9 +522,7 @@ cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) assert(idct && buffer); pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL); - pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL); - - vl_vb_cleanup(&buffer->blocks); + pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, NULL); } struct pipe_resource * @@ -555,20 +578,19 @@ vl_idct_upload_matrix(struct pipe_context *pipe) bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, - struct pipe_resource *matrix) + unsigned blocks_x, unsigned blocks_y, + int color_swizzle, struct pipe_resource *matrix) { assert(idct && pipe && matrix); idct->pipe = pipe; idct->buffer_width = buffer_width; idct->buffer_height = buffer_height; + idct->blocks_x = blocks_x; + idct->blocks_y = blocks_y; pipe_resource_reference(&idct->matrix, matrix); - idct->max_blocks = - align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH * - align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT; - - if(!init_shaders(idct)) + if(!init_shaders(idct, color_swizzle)) return false; if(!init_state(idct)) { @@ -589,7 +611,8 @@ vl_idct_cleanup(struct vl_idct *idct) } bool -vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst) +vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_resource *dst, struct pipe_vertex_buffer stream) { struct pipe_surface template; @@ -606,7 +629,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct if (!init_textures(idct, buffer)) return false; - if (!init_vertex_buffers(idct, buffer)) + if (!init_vertex_buffers(idct, buffer, stream)) return false; /* init state */ @@ -694,14 +717,12 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) ); buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer); - - vl_vb_map(&buffer->blocks, idct->pipe); } void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block) { - struct vertex2s v; + //struct vertex2s v; unsigned tex_pitch; short *texels; @@ -714,10 +735,6 @@ vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short * for (i = 0; i < BLOCK_HEIGHT; ++i) memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short)); - - v.x = x; - v.y = y; - vl_vb_add_block(&buffer->blocks, &v); } void @@ -727,19 +744,18 @@ vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer) idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer); idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer); - vl_vb_unmap(&buffer->blocks, idct->pipe); } void -vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer) +vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) { unsigned num_verts; assert(idct); + assert(buffer); - num_verts = vl_vb_restart(&buffer->blocks); - - if(num_verts > 0) { + if(num_instances > 0) { + num_verts = idct->blocks_x * idct->blocks_y * 4; idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all); @@ -752,7 +768,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer) idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]); idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs); idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); /* second stage */ idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]); @@ -761,6 +777,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer) idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]); idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs); idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances); } } diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h index fcba75a7607..78e4a46369d 100644 --- a/src/gallium/auxiliary/vl/vl_idct.h +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -37,8 +37,7 @@ struct vl_idct unsigned buffer_width; unsigned buffer_height; - - unsigned max_blocks; + unsigned blocks_x, blocks_y; void *rs_state; void *vertex_elems_state; @@ -58,8 +57,6 @@ struct vl_idct struct pipe_resource *matrix; struct pipe_vertex_buffer quad; - - unsigned vertex_buffer_stride; }; struct vl_idct_buffer @@ -92,24 +89,24 @@ struct vl_idct_buffer union { struct pipe_vertex_buffer all[2]; - struct { struct pipe_vertex_buffer quad, pos; } individual; + struct { struct pipe_vertex_buffer quad, stream; } individual; } vertex_bufs; - struct vl_vertex_buffer blocks; - struct pipe_transfer *tex_transfer; short *texels; }; struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe); -bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, - unsigned buffer_width, unsigned buffer_height, - struct pipe_resource *matrix); +bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, + unsigned buffer_width, unsigned buffer_height, + unsigned blocks_x, unsigned blocks_y, + int color_swizzle, struct pipe_resource *matrix); void vl_idct_cleanup(struct vl_idct *idct); -bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst); +bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, + struct pipe_resource *dst, struct pipe_vertex_buffer stream); void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer); @@ -119,6 +116,6 @@ void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, sh void vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer); -void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer); +void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts); #endif diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 2f68ec17042..d1b2144aea5 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -38,40 +38,11 @@ #include #include -#define DEFAULT_BUF_ALIGNMENT 1 #define MACROBLOCK_WIDTH 16 #define MACROBLOCK_HEIGHT 16 #define BLOCK_WIDTH 8 #define BLOCK_HEIGHT 8 -struct vertex_stream -{ - struct vertex2s pos; - struct vertex2s mv[4]; - struct { - int8_t y; - int8_t cr; - int8_t cb; - int8_t flag; - } eb[2][2]; -}; - -enum VS_INPUT -{ - VS_I_RECT, - VS_I_VPOS, - VS_I_MV0, - VS_I_MV1, - VS_I_MV2, - VS_I_MV3, - VS_I_EB_0_0, - VS_I_EB_0_1, - VS_I_EB_1_0, - VS_I_EB_1_1, - - NUM_VS_INPUTS -}; - enum VS_OUTPUT { VS_O_VPOS, @@ -514,14 +485,13 @@ static bool init_buffers(struct vl_mpeg12_mc_renderer *r) { struct pipe_resource *idct_matrix; - struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; const unsigned mbw = align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; const unsigned mbh = align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; - unsigned i, chroma_width, chroma_height; + unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y; assert(r); @@ -531,51 +501,37 @@ init_buffers(struct vl_mpeg12_mc_renderer *r) if (!(idct_matrix = vl_idct_upload_matrix(r->pipe))) return false; - if (!vl_idct_init(&r->idct_luma, r->pipe, r->buffer_width, r->buffer_height, idct_matrix)) + if (!vl_idct_init(&r->idct_y, r->pipe, r->buffer_width, r->buffer_height, + 2, 2, TGSI_SWIZZLE_X, idct_matrix)) return false; if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { chroma_width = r->buffer_width / 2; chroma_height = r->buffer_height / 2; + chroma_blocks_x = 1; + chroma_blocks_y = 1; } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { chroma_width = r->buffer_width; chroma_height = r->buffer_height / 2; + chroma_blocks_x = 2; + chroma_blocks_y = 1; } else { chroma_width = r->buffer_width; chroma_height = r->buffer_height; + chroma_blocks_x = 2; + chroma_blocks_y = 2; } - if(!vl_idct_init(&r->idct_chroma, r->pipe, chroma_width, chroma_height, idct_matrix)) + if(!vl_idct_init(&r->idct_cr, r->pipe, chroma_width, chroma_height, + chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix)) return false; - memset(&vertex_elems, 0, sizeof(vertex_elems)); - - vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); - r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch); - - /* Position element */ - vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; - - for (i = 0; i < 4; ++i) - /* motion vector 0..4 element */ - vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED; - - /* y, cr, cb empty block element top left block */ - vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - - /* y, cr, cb empty block element top right block */ - vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - - /* y, cr, cb empty block element bottom left block */ - vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - - /* y, cr, cb empty block element bottom right block */ - vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - - r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1); + if(!vl_idct_init(&r->idct_cb, r->pipe, chroma_width, chroma_height, + chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix)) + return false; - r->vertex_elems_state = r->pipe->create_vertex_elements_state( - r->pipe, NUM_VS_INPUTS, vertex_elems); + r->quad = vl_vb_upload_quads(r->pipe, 1, 1); + r->vertex_elems_state = vl_vb_get_elems_state(r->pipe, true); if (r->vertex_elems_state == NULL) return false; @@ -597,8 +553,9 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r) r->pipe->delete_vs_state(r->pipe, r->vs); r->pipe->delete_fs_state(r->pipe, r->fs); - vl_idct_cleanup(&r->idct_luma); - vl_idct_cleanup(&r->idct_chroma); + vl_idct_cleanup(&r->idct_y); + vl_idct_cleanup(&r->idct_cr); + vl_idct_cleanup(&r->idct_cb); r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state); } @@ -630,118 +587,6 @@ static struct pipe_sampler_view return sampler_view; } -static void -get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4]) -{ - switch (mb->mb_type) { - case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - { - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - mv[2].x = mb->pmv[0][1][0]; - mv[2].y = mb->pmv[0][1][1]; - - } else { - mv[2].x = mb->pmv[0][1][0]; - mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4); - - mv[3].x = mb->pmv[1][1][0]; - mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4); - - if(mb->mvfs[0][1]) mv[2].y += 2; - if(!mb->mvfs[1][1]) mv[3].y -= 2; - } - - /* fall-through */ - } - case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - { - if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) { - - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - mv[0].x = mb->pmv[0][1][0]; - mv[0].y = mb->pmv[0][1][1]; - - } else { - mv[0].x = mb->pmv[0][1][0]; - mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4); - - mv[1].x = mb->pmv[1][1][0]; - mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4); - - if(mb->mvfs[0][1]) mv[0].y += 2; - if(!mb->mvfs[1][1]) mv[1].y -= 2; - } - - } else { - - if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - mv[0].x = mb->pmv[0][0][0]; - mv[0].y = mb->pmv[0][0][1]; - - } else { - mv[0].x = mb->pmv[0][0][0]; - mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4); - - mv[1].x = mb->pmv[1][0][0]; - mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4); - - if(mb->mvfs[0][0]) mv[0].y += 2; - if(!mb->mvfs[1][0]) mv[1].y -= 2; - } - } - } - default: - break; - } -} - -static void -grab_vectors(struct vl_mpeg12_mc_renderer *r, - struct vl_mpeg12_mc_buffer *buffer, - struct pipe_mpeg12_macroblock *mb) -{ - struct vertex_stream stream; - - unsigned i, j; - - assert(r); - assert(mb); - - stream.pos.x = mb->mbx; - stream.pos.y = mb->mby; - for ( i = 0; i < 2; ++i) { - for ( j = 0; j < 2; ++j) { - stream.eb[i][j].y = !(mb->cbp & (*r->empty_block_mask)[0][i][j]); - stream.eb[i][j].cr = !(mb->cbp & (*r->empty_block_mask)[1][i][j]); - stream.eb[i][j].cb = !(mb->cbp & (*r->empty_block_mask)[2][i][j]); - } - } - stream.eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; - stream.eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME; - stream.eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD; - switch (mb->mb_type) { - case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: - stream.eb[1][1].flag = -1; - break; - - case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - stream.eb[1][1].flag = 1; - break; - - case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - stream.eb[1][1].flag = 0; - break; - - default: - assert(0); - } - - get_motion_vectors(mb, stream.mv); - vl_vb_add_block(&buffer->vertex_stream, &stream); -} - static void grab_blocks(struct vl_mpeg12_mc_renderer *r, struct vl_mpeg12_mc_buffer *buffer, @@ -785,12 +630,9 @@ grab_macroblock(struct vl_mpeg12_mc_renderer *r, assert(r); assert(mb); assert(mb->blocks); - assert(buffer->num_macroblocks < r->macroblocks_per_batch); - grab_vectors(r, buffer, mb); + vl_vb_add_block(&buffer->vertex_stream, mb, r->empty_block_mask); grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks); - - ++buffer->num_macroblocks; } static void @@ -878,7 +720,13 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 buffer->surface = NULL; buffer->past = NULL; buffer->future = NULL; - buffer->num_macroblocks = 0; + + buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride; + buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset; + pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer); + + buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, renderer->pipe, + renderer->macroblocks_per_batch); memset(&template, 0, sizeof(struct pipe_resource)); template.target = PIPE_TEXTURE_2D; @@ -895,7 +743,9 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template); - if (!vl_idct_init_buffer(&renderer->idct_luma, &buffer->idct_y, buffer->textures.individual.y)) + if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y, + buffer->textures.individual.y, + buffer->vertex_bufs.individual.stream)) return false; if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { @@ -910,10 +760,14 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 buffer->textures.individual.cr = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template); - if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cb, buffer->textures.individual.cb)) + if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb, + buffer->textures.individual.cb, + buffer->vertex_bufs.individual.stream)) return false; - if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cr, buffer->textures.individual.cr)) + if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr, + buffer->textures.individual.cr, + buffer->vertex_bufs.individual.stream)) return false; for (i = 0; i < 3; ++i) { @@ -928,14 +782,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1 renderer->pipe, buffer->textures.all[i], &sampler_view); } - buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride; - buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset; - pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer); - - buffer->vertex_bufs.individual.stream = vl_vb_init( - &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch, - renderer->vertex_stream_stride); - return true; } @@ -955,9 +801,9 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL); vl_vb_cleanup(&buffer->vertex_stream); - vl_idct_cleanup_buffer(&renderer->idct_luma, &buffer->idct_y); - vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cb); - vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cr); + vl_idct_cleanup_buffer(&renderer->idct_y, &buffer->idct_y); + vl_idct_cleanup_buffer(&renderer->idct_cb, &buffer->idct_cb); + vl_idct_cleanup_buffer(&renderer->idct_cr, &buffer->idct_cr); pipe_surface_reference(&buffer->surface, NULL); pipe_surface_reference(&buffer->past, NULL); @@ -969,9 +815,9 @@ vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12 { assert(renderer && buffer); - vl_idct_map_buffers(&renderer->idct_luma, &buffer->idct_y); - vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cr); - vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cb); + vl_idct_map_buffers(&renderer->idct_y, &buffer->idct_y); + vl_idct_map_buffers(&renderer->idct_cr, &buffer->idct_cr); + vl_idct_map_buffers(&renderer->idct_cb, &buffer->idct_cb); vl_vb_map(&buffer->vertex_stream, renderer->pipe); } @@ -986,6 +832,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, struct pipe_mpeg12_macroblock *mpeg12_macroblocks, struct pipe_fence_handle **fence) { + unsigned i; + assert(renderer && buffer); assert(surface); assert(num_macroblocks); @@ -1002,26 +850,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, assert(buffer->future == future); } - while (num_macroblocks) { - unsigned left_in_batch = renderer->macroblocks_per_batch - buffer->num_macroblocks; - unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); - unsigned i; - - for (i = 0; i < num_to_submit; ++i) { - assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); - grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]); - } - - num_macroblocks -= num_to_submit; - - if (buffer->num_macroblocks == renderer->macroblocks_per_batch) { - vl_mpeg12_mc_unmap_buffer(renderer, buffer); - vl_mpeg12_mc_renderer_flush(renderer, buffer); - pipe_surface_reference(&buffer->surface, surface); - pipe_surface_reference(&buffer->past, past); - pipe_surface_reference(&buffer->future, future); - vl_mpeg12_mc_map_buffer(renderer, buffer); - } + for (i = 0; i < num_macroblocks; ++i) { + assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); + grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]); } } @@ -1030,9 +861,9 @@ vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg { assert(renderer && buffer); - vl_idct_unmap_buffers(&renderer->idct_luma, &buffer->idct_y); - vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cr); - vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cb); + vl_idct_unmap_buffers(&renderer->idct_y, &buffer->idct_y); + vl_idct_unmap_buffers(&renderer->idct_cr, &buffer->idct_cr); + vl_idct_unmap_buffers(&renderer->idct_cb, &buffer->idct_cb); vl_vb_unmap(&buffer->vertex_stream, renderer->pipe); } @@ -1040,17 +871,18 @@ vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) { + unsigned num_not_empty, num_empty; assert(renderer && buffer); - assert(buffer->num_macroblocks <= renderer->macroblocks_per_batch); - if (buffer->num_macroblocks == 0) + num_not_empty = buffer->vertex_stream.num_not_empty; + num_empty = buffer->vertex_stream.num_empty; + + if (num_not_empty == 0 && num_empty == 0) return; - vl_idct_flush(&renderer->idct_luma, &buffer->idct_y); - vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cr); - vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cb); - - vl_vb_restart(&buffer->vertex_stream); + vl_idct_flush(&renderer->idct_y, &buffer->idct_y, num_not_empty); + vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, num_not_empty); + vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, num_not_empty); renderer->fb_state.cbufs[0] = buffer->surface; renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); @@ -1060,18 +892,14 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state); if (buffer->past) { - buffer->textures.individual.ref[0] = buffer->past->texture; buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past); } else { - buffer->textures.individual.ref[0] = buffer->surface->texture; buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface); } if (buffer->future) { - buffer->textures.individual.ref[1] = buffer->future->texture; buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future); } else { - buffer->textures.individual.ref[1] = buffer->surface->texture; buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface); } @@ -1080,7 +908,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs); renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs); - util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, buffer->num_macroblocks); + + if (num_not_empty > 0) + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_not_empty); + + if (num_empty > 0) + util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, + buffer->vertex_stream.size - num_empty, num_empty); renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence); @@ -1089,5 +923,5 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp pipe_surface_reference(&buffer->past, NULL); pipe_surface_reference(&buffer->future, NULL); - buffer->num_macroblocks = 0; + vl_vb_restart(&buffer->vertex_stream); } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h index 76d6e25ca36..c319064c70f 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h @@ -56,12 +56,10 @@ struct vl_mpeg12_mc_renderer enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode; unsigned macroblocks_per_batch; - unsigned vertex_stream_stride; - struct pipe_viewport_state viewport; struct pipe_framebuffer_state fb_state; - struct vl_idct idct_luma, idct_chroma; + struct vl_idct idct_y, idct_cr, idct_cb; void *vertex_elems_state; void *rs_state; @@ -93,8 +91,8 @@ struct vl_mpeg12_mc_buffer union { - struct pipe_resource *all[5]; - struct { struct pipe_resource *y, *cb, *cr, *ref[2]; } individual; + struct pipe_resource *all[3]; + struct { struct pipe_resource *y, *cb, *cr; } individual; } textures; union @@ -107,7 +105,6 @@ struct vl_mpeg12_mc_buffer struct pipe_surface *surface, *past, *future; struct pipe_fence_handle **fence; - unsigned num_macroblocks; }; bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h index 9c745d73978..a927e829349 100644 --- a/src/gallium/auxiliary/vl/vl_types.h +++ b/src/gallium/auxiliary/vl/vl_types.h @@ -43,9 +43,4 @@ struct vertex4f float x, y, z, w; }; -struct quadf -{ - struct vertex2f bl, tl, tr, br; -}; - #endif /* vl_types_h */ diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index 610f37e1f89..2d602b96d3f 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -34,32 +34,43 @@ #include "vl_vertex_buffers.h" #include "vl_types.h" +struct vl_vertex_stream +{ + struct vertex2s pos; + struct { + int8_t y; + int8_t cr; + int8_t cb; + int8_t flag; + } eb[2][2]; + struct vertex2s mv[4]; +}; + /* vertices for a quad covering a block */ -static const struct quadf const_quad = { - {0.0f, 1.0f}, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f} +static const struct vertex2f block_quad[4] = { + {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} }; struct pipe_vertex_buffer -vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks) +vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y) { struct pipe_vertex_buffer quad; struct pipe_transfer *buf_transfer; - struct quadf *v; + struct vertex4f *v; - unsigned i; + unsigned x, y, i; assert(pipe); - assert(max_blocks); /* create buffer */ - quad.stride = sizeof(struct vertex2f); + quad.stride = sizeof(struct vertex4f); quad.buffer_offset = 0; quad.buffer = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STATIC, - sizeof(struct vertex2f) * 4 * max_blocks + sizeof(struct vertex4f) * 4 * blocks_x * blocks_y ); if(!quad.buffer) @@ -74,15 +85,24 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks) &buf_transfer ); - for ( i = 0; i < max_blocks; ++i) - memcpy(v + i, &const_quad, sizeof(const_quad)); + for ( y = 0; y < blocks_y; ++y) { + for ( x = 0; x < blocks_x; ++x) { + for (i = 0; i < 4; ++i, ++v) { + v->x = block_quad[i].x; + v->y = block_quad[i].y; + + v->z = x; + v->w = y; + } + } + } pipe_buffer_unmap(pipe, buf_transfer); return quad; } -struct pipe_vertex_element +static struct pipe_vertex_element vl_vb_get_quad_vertex_element(void) { struct pipe_vertex_element element; @@ -91,12 +111,12 @@ vl_vb_get_quad_vertex_element(void) element.src_offset = 0; element.instance_divisor = 0; element.vertex_buffer_index = 0; - element.src_format = PIPE_FORMAT_R32G32_FLOAT; + element.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; return element; } -unsigned +static void vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements, unsigned vertex_buffer_index) { @@ -110,29 +130,61 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements elements[i].vertex_buffer_index = vertex_buffer_index; offset += util_format_get_blocksize(elements[i].src_format); } +} + +void * +vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs) +{ + struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; + + unsigned i; + + memset(&vertex_elems, 0, sizeof(vertex_elems)); + vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + + /* Position element */ + vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; + + /* y, cr, cb empty block element top left block */ + vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + + /* y, cr, cb empty block element top right block */ + vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + + /* y, cr, cb empty block element bottom left block */ + vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; + + /* y, cr, cb empty block element bottom right block */ + vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED; - return offset; + for (i = 0; i < 4; ++i) + /* motion vector 0..4 element */ + vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED; + + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - (include_mvs ? 1 : 5), 1); + + return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS - (include_mvs ? 0 : 4), vertex_elems); } struct pipe_vertex_buffer -vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, - unsigned max_blocks, unsigned stride) +vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned size) { struct pipe_vertex_buffer buf; assert(buffer); - buffer->num_verts = 0; - buffer->stride = stride; + buffer->size = size; + buffer->num_not_empty = 0; + buffer->num_empty = 0; - buf.stride = stride; + buf.stride = sizeof(struct vl_vertex_stream); buf.buffer_offset = 0; buf.buffer = pipe_buffer_create ( pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, - stride * 4 * max_blocks + sizeof(struct vl_vertex_stream) * size ); pipe_resource_reference(&buffer->resource, buf.buffer); @@ -147,13 +199,129 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) { assert(buffer && pipe); - buffer->vectors = pipe_buffer_map + buffer->start = pipe_buffer_map ( pipe, buffer->resource, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, &buffer->transfer ); + buffer->end = buffer->start + buffer->resource->width0 / sizeof(struct vl_vertex_stream); +} + +static void +get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4]) +{ + switch (mb->mb_type) { + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + { + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + mv[2].x = mb->pmv[0][1][0]; + mv[2].y = mb->pmv[0][1][1]; + + } else { + mv[2].x = mb->pmv[0][1][0]; + mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4); + + mv[3].x = mb->pmv[1][1][0]; + mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4); + + if(mb->mvfs[0][1]) mv[2].y += 2; + if(!mb->mvfs[1][1]) mv[3].y -= 2; + } + + /* fall-through */ + } + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + { + if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) { + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + mv[0].x = mb->pmv[0][1][0]; + mv[0].y = mb->pmv[0][1][1]; + + } else { + mv[0].x = mb->pmv[0][1][0]; + mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4); + + mv[1].x = mb->pmv[1][1][0]; + mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4); + + if(mb->mvfs[0][1]) mv[0].y += 2; + if(!mb->mvfs[1][1]) mv[1].y -= 2; + } + + } else { + + if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { + mv[0].x = mb->pmv[0][0][0]; + mv[0].y = mb->pmv[0][0][1]; + + } else { + mv[0].x = mb->pmv[0][0][0]; + mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4); + + mv[1].x = mb->pmv[1][0][0]; + mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4); + + if(mb->mvfs[0][0]) mv[0].y += 2; + if(!mb->mvfs[1][0]) mv[1].y -= 2; + } + } + } + default: + break; + } +} + +void +vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb, + const unsigned (*empty_block_mask)[3][2][2]) +{ + struct vl_vertex_stream *stream; + unsigned i, j; + + assert(buffer); + assert(mb); + + if(mb->cbp) + stream = buffer->start + buffer->num_not_empty++; + else + stream = buffer->end - ++buffer->num_empty; + + stream->pos.x = mb->mbx; + stream->pos.y = mb->mby; + + for ( i = 0; i < 2; ++i) { + for ( j = 0; j < 2; ++j) { + stream->eb[i][j].y = !(mb->cbp & (*empty_block_mask)[0][i][j]); + stream->eb[i][j].cr = !(mb->cbp & (*empty_block_mask)[1][i][j]); + stream->eb[i][j].cb = !(mb->cbp & (*empty_block_mask)[2][i][j]); + } + } + stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD; + stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME; + stream->eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD; + switch (mb->mb_type) { + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + stream->eb[1][1].flag = -1; + break; + + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + stream->eb[1][1].flag = 1; + break; + + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + stream->eb[1][1].flag = 0; + break; + + default: + assert(0); + } + + get_motion_vectors(mb, stream->mv); } void @@ -164,14 +332,13 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe) pipe_buffer_unmap(pipe, buffer->transfer); } -unsigned +void vl_vb_restart(struct vl_vertex_buffer *buffer) { assert(buffer); - unsigned todo = buffer->num_verts; - buffer->num_verts = 0; - return todo; + buffer->num_not_empty = 0; + buffer->num_empty = 0; } void diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index ca06abe2027..4400bda6274 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -29,45 +29,53 @@ #include #include +#include #include "vl_types.h" +enum VS_INPUT +{ + VS_I_RECT, + VS_I_VPOS, + VS_I_EB_0_0, + VS_I_EB_0_1, + VS_I_EB_1_0, + VS_I_EB_1_1, + VS_I_MV0, + VS_I_MV1, + VS_I_MV2, + VS_I_MV3, + + NUM_VS_INPUTS +}; + struct vl_vertex_buffer { - unsigned num_verts; - unsigned stride; + unsigned size; + unsigned num_not_empty; + unsigned num_empty; struct pipe_resource *resource; struct pipe_transfer *transfer; - void *vectors; + struct vl_vertex_stream *start; + struct vl_vertex_stream *end; }; -struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks); +struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, + unsigned blocks_x, unsigned blocks_y); -struct pipe_vertex_element vl_vb_get_quad_vertex_element(void); - -unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements, - unsigned vertex_buffer_index); +void *vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs); struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, - unsigned max_blocks, unsigned stride); + unsigned max_blocks); void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); -static inline void -vl_vb_add_block(struct vl_vertex_buffer *buffer, void *elements) -{ - void *pos; - - assert(buffer); - - pos = buffer->vectors + buffer->num_verts * buffer->stride; - memcpy(pos, elements, buffer->stride); - buffer->num_verts++; -} +void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb, + const unsigned (*empty_block_mask)[3][2][2]); void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe); -unsigned vl_vb_restart(struct vl_vertex_buffer *buffer); +void vl_vb_restart(struct vl_vertex_buffer *buffer); void vl_vb_cleanup(struct vl_vertex_buffer *buffer);