X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fvl%2Fvl_mpeg12_mc_renderer.c;h=b195d7e2c397ab0964a080c654dff3a52308bf9e;hb=f853ea007816cdad4395b42388e12cd65bb8eb43;hp=70bb756413137077f949c4c7a4ae30ea011fd7a4;hpb=27016941bca40a6563dd2122369745351102bc0c;p=mesa.git diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 70bb7564131..b195d7e2c39 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -43,16 +43,33 @@ #define BLOCK_WIDTH 8 #define BLOCK_HEIGHT 8 -struct vertex_shader_consts +struct vertex_stream { - struct vertex4f norm; + struct vertex2f pos; + struct { + float y; + float cr; + float cb; + } eb[2][2]; + float interlaced; + float frame_pred; + float ref_frames; + float bkwd_pred; + struct vertex2f mv[4]; }; enum VS_INPUT { VS_I_RECT, VS_I_VPOS, + VS_I_EB_0_0, + VS_I_EB_0_1, + VS_I_EB_1_0, + VS_I_EB_1_1, VS_I_INTERLACED, + VS_I_FRAME_PRED, + VS_I_REF_FRAMES, + VS_I_BKWD_PRED, VS_I_MV0, VS_I_MV1, VS_I_MV2, @@ -68,129 +85,160 @@ enum VS_OUTPUT VS_O_TEX0, VS_O_TEX1, VS_O_TEX2, + VS_O_EB_0, + VS_O_EB_1, + VS_O_INFO, VS_O_MV0, VS_O_MV1, VS_O_MV2, VS_O_MV3 }; -static const unsigned const_mbtype_config[VL_NUM_MACROBLOCK_TYPES][2] = { - [VL_MACROBLOCK_TYPE_INTRA] = { 0, 0 }, - [VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] = { 1, 1 }, - [VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] = { 1, 2 }, - [VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] = { 1, 1 }, - [VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] = { 1, 2 }, - [VL_MACROBLOCK_TYPE_BI_FRAME_PRED] = { 2, 1 }, - [VL_MACROBLOCK_TYPE_BI_FIELD_PRED] = { 2, 2 } -}; - static void * -create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame) +create_vert_shader(struct vl_mpeg12_mc_renderer *r) { struct ureg_program *shader; - struct ureg_src norm, mbs; - struct ureg_src vrect, vpos, interlaced, vmv[4]; - struct ureg_dst scale, t_vpos, t_vtex; - struct ureg_dst o_vpos, o_line, o_vtex[3], o_vmv[4]; - unsigned i, j, count, label; + struct ureg_src block_scale, mv_scale; + struct ureg_src vrect, vpos, eb[2][2], vmv[4]; + struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred; + struct ureg_dst t_vpos, t_vtex, t_vmv; + struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info; + unsigned i, label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; - norm = ureg_DECL_constant(shader, 0); - mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT); - - scale = ureg_DECL_temporary(shader); t_vpos = ureg_DECL_temporary(shader); t_vtex = ureg_DECL_temporary(shader); + t_vmv = ureg_DECL_temporary(shader); vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0); + eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0); + eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1); + eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1); interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED); + frame_pred = ureg_DECL_vs_input(shader, VS_I_FRAME_PRED); + ref_frames = ureg_DECL_vs_input(shader, VS_I_REF_FRAMES); + bkwd_pred = ureg_DECL_vs_input(shader, VS_I_BKWD_PRED); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); - o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE); + o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE); o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0); o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1); - o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2); + o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2); + o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0); + o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1); + o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO); - count=0; - for (i = 0; i < ref_frames; ++i) { - for (j = 0; j < 2; ++j) { - if(j < mv_per_frame) { - vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + count); - o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count); - count++; - } - } + for (i = 0; i < 4; ++i) { + vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i); + o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i); } /* - * scale = norm * mbs; + * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height) + * mv_scale = 0.5 / (dst.width, dst.height); * - * t_vpos = (vpos + vrect) * scale + * t_vpos = (vpos + vrect) * block_scale * o_vpos.xy = t_vpos * o_vpos.zw = vpos * - * o_line = vpos * 8 + * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0] + * + * o_frame_pred = frame_pred + * o_info.x = ref_frames + * o_info.y = ref_frames > 0 + * o_info.z = bkwd_pred + * + * // Apply motion vectors + * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale + * + * o_line.xy = vrect * 8 + * o_line.z = interlaced * * if(interlaced) { * t_vtex.x = vrect.x * t_vtex.y = vrect.y * 0.5 * t_vtex += vpos * - * o_vtex[0].xy = t_vtex * scale + * o_vtex[0].xy = t_vtex * block_scale * * t_vtex.y += 0.5 - * o_vtex[1].xy = t_vtex * scale + * o_vtex[1].xy = t_vtex * block_scale * } else { * o_vtex[0..1].xy = t_vpos * } * o_vtex[2].xy = t_vpos * - * if(count > 0) { // Apply motion vectors - * scale = norm * 0.5; - * o_vmv[0..count] = t_vpos + vmv[0..count] * scale - * } - * */ - ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs); + block_scale = ureg_imm2f(shader, + (float)MACROBLOCK_WIDTH / r->buffer_width, + (float)MACROBLOCK_HEIGHT / r->buffer_height); + + mv_scale = ureg_imm2f(shader, + 0.5f / r->buffer_width, + 0.5f / r->buffer_height); ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect); - ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale)); + ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos); - ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_XY), vrect, - ureg_imm2f(shader, MACROBLOCK_WIDTH / 2, MACROBLOCK_HEIGHT / 2)); + ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), + eb[0][1], eb[0][0]); + ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ), + ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)), + eb[1][1], eb[1][0]); + + ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X), ref_frames); + ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y), + ureg_scalar(ref_frames, TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.0f)); + ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z), ureg_scalar(bkwd_pred, TGSI_SWIZZLE_X)); + + ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos)); + ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos)); + + ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)), + vmv[0], vmv[1]); + ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos)); + + ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)), + vmv[2], vmv[3]); + ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos)); + + ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + + ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y)); + ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), + vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2)); - ureg_IF(shader, interlaced, &label); + ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label); ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect); ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f)); ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex)); - ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale)); + ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale); ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f)); - ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale)); + ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale); - ureg_ELSE(shader, &label); - - ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); + ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), + ureg_scalar(vrect, TGSI_SWIZZLE_Y), + ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2)); + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); - ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos)); - - if(count > 0) { - ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f)); - for (i = 0; i < count; ++i) - ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos)); - } ureg_release_temporary(shader, t_vtex); ureg_release_temporary(shader, t_vpos); - ureg_release_temporary(shader, scale); + ureg_release_temporary(shader, t_vmv); ureg_END(shader); @@ -204,16 +252,19 @@ calc_field(struct ureg_program *shader) struct ureg_src line; tmp = ureg_DECL_temporary(shader); + line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR); /* - * line going from 0 to 8 in steps of 0.5 + * line.x going from 0 to 1 if not interlaced + * line.x going from 0 to 8 in steps of 0.5 if interlaced + * line.y going from 0 to 8 in steps of 0.5 * - * tmp.y = fraction(line.y) - * tmp.y = tmp.y >= 0.5 ? 1 : 0 + * tmp.xy = fraction(line) + * tmp.xy = tmp.xy >= 0.5 ? 1 : 0 */ - ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line); - ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); + ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line); + ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f)); return tmp; } @@ -221,19 +272,21 @@ calc_field(struct ureg_program *shader) static struct ureg_dst fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field) { - struct ureg_src tc[3]; - struct ureg_src sampler[3]; - struct ureg_dst texel, t_tc, tmp; - unsigned i; + struct ureg_src tc[3], sampler[3], eb[2]; + struct ureg_dst texel, t_tc, t_eb_info; + unsigned i, label; texel = ureg_DECL_temporary(shader); t_tc = ureg_DECL_temporary(shader); - tmp = ureg_DECL_temporary(shader); + t_eb_info = ureg_DECL_temporary(shader); tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR); tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR); + eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT); + eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT); + for (i = 0; i < 3; ++i) { sampler[i] = ureg_DECL_sampler(shader, i); } @@ -243,107 +296,113 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct * texel.cb = tex(tc[2], sampler[1]) * texel.cr = tex(tc[2], sampler[2]) */ - for (i = 0; i < 3; ++i) { - if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) { - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), - ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), + ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)), tc[1], tc[0]); - } else { - ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]); - } + ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), + ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)), + eb[1], eb[0]); + + /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */ + ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f)); + + ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f)); + for (i = 0; i < 3; ++i) { + ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label); - /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */ - ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]); - ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); + /* Nouveau can't writemask tex dst regs (yet?), so this won't work anymore on nvidia hardware */ + if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) { + ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]); + } else { + ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]); + } + + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); } ureg_release_temporary(shader, t_tc); - ureg_release_temporary(shader, tmp); + ureg_release_temporary(shader, t_eb_info); return texel; } static struct ureg_dst -fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames, unsigned mv_per_frame) +fetch_ref(struct ureg_program *shader, struct ureg_dst field) { - struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames]; - struct ureg_dst ref[ref_frames], t_tc, result; - unsigned i, label; + struct ureg_src info; + struct ureg_src tc[4], sampler[2]; + struct ureg_dst ref[2], result; + unsigned i, intra_label, bi_label, label; + + info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT); - for (i = 0; i < ref_frames * mv_per_frame; ++i) + for (i = 0; i < 4; ++i) tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR); - for (i = 0; i < ref_frames; ++i) { + for (i = 0; i < 2; ++i) { sampler[i] = ureg_DECL_sampler(shader, i + 3); ref[i] = ureg_DECL_temporary(shader); } result = ureg_DECL_temporary(shader); - if (ref_frames == 1) { - if(mv_per_frame == 1) - /* - * result = tex(tc[0], sampler[0]) - */ - ureg_TEX(shader, result, TGSI_TEXTURE_2D, tc[0], sampler[0]); - else { - t_tc = ureg_DECL_temporary(shader); - /* - * result = tex(field.y ? tc[1] : tc[0], sampler[0]) - */ - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), - ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), - tc[1], tc[0]); - ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]); + ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f)); - ureg_release_temporary(shader, t_tc); - } + ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Y), &intra_label); + ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY), + ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), + tc[1], tc[0]); + + ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label); - } else if (ref_frames == 2) { - if(mv_per_frame == 1) { /* - * ref[0..1] = tex(tc[0..1], sampler[0..1]) + * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0]) */ - ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]); - ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]); - } else { - t_tc = ureg_DECL_temporary(shader); + ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label); + ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]); + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ELSE(shader, &label); + ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]); + ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + + ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader)); + ureg_ELSE(shader, &bi_label); /* - * if (field.y) + * if (field.z) * ref[0..1] = tex(tc[0..1], sampler[0..1]) * else * ref[0..1] = tex(tc[2..3], sampler[0..1]) */ - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), - ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), - tc[1], tc[0]); - ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]); - - ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), + ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[3], tc[2]); - ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]); + ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]); + ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]); - ureg_release_temporary(shader, t_tc); - } + ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f), + ureg_src(ref[0]), ureg_src(ref[1])); - ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1])); - } + ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); + ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader)); + ureg_ENDIF(shader); - for (i = 0; i < ref_frames; ++i) + for (i = 0; i < 2; ++i) ureg_release_temporary(shader, ref[i]); return result; } static void * -create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame) +create_frag_shader(struct vl_mpeg12_mc_renderer *r) { struct ureg_program *shader; - struct ureg_src result; + struct ureg_dst result; struct ureg_dst field, texel; struct ureg_dst fragment; @@ -356,102 +415,30 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne field = calc_field(shader); texel = fetch_ycbcr(r, shader, field); - if (ref_frames == 0) - result = ureg_imm1f(shader, 0.5f); - else - result = ureg_src(fetch_ref(shader, field, ref_frames, mv_per_frame)); + result = fetch_ref(shader, field); - ureg_ADD(shader, fragment, ureg_src(texel), result); + ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(texel), ureg_src(result)); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); + ureg_release_temporary(shader, result); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); } -static bool -init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type, - struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]) -{ - unsigned ref_frames, mv_per_frame; - struct vl_mc_mbtype_handler *handler; - unsigned i; - - assert(r); - - ref_frames = const_mbtype_config[type][0]; - mv_per_frame = const_mbtype_config[type][1]; - - handler = &r->mbtype_handlers[type]; - - handler->vs = create_vert_shader(r, ref_frames, mv_per_frame); - handler->fs = create_frag_shader(r, ref_frames, mv_per_frame); - - if (handler->vs == NULL || handler->fs == NULL) - return false; - - handler->vertex_elems_state = r->pipe->create_vertex_elements_state( - r->pipe, 3 + ref_frames * mv_per_frame, vertex_elems); - - if (handler->vertex_elems_state == NULL) - return false; - - if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch)) - return false; - - handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4); - if (handler->interlaced == NULL) - return false; - - for (i = 0; i < ref_frames * mv_per_frame; ++i) { - handler->mv[i] = MALLOC(sizeof(struct vertex2f) * r->macroblocks_per_batch * 4); - if (handler->mv[i] == NULL) - return false; - } - - return true; -} - -static void -cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type) -{ - unsigned ref_frames, mv_per_frame; - struct vl_mc_mbtype_handler *handler; - unsigned i; - - assert(r); - - ref_frames = const_mbtype_config[type][0]; - mv_per_frame = const_mbtype_config[type][1]; - - handler = &r->mbtype_handlers[type]; - - r->pipe->delete_vs_state(r->pipe, handler->vs); - r->pipe->delete_fs_state(r->pipe, handler->fs); - r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state); - - handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4); - FREE(handler->interlaced); - - for (i = 0; i < ref_frames * mv_per_frame; ++i) - FREE(handler->mv[i]); -} - - static bool init_pipe_state(struct vl_mpeg12_mc_renderer *r) { struct pipe_sampler_state sampler; + struct pipe_rasterizer_state rs_state; unsigned filters[5]; unsigned i; assert(r); - r->viewport.scale[0] = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - r->viewport.scale[1] = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; + r->viewport.scale[0] = r->buffer_width; + r->viewport.scale[1] = r->buffer_height; r->viewport.scale[2] = 1; r->viewport.scale[3] = 1; r->viewport.translate[0] = 0; @@ -459,10 +446,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) r->viewport.translate[2] = 0; r->viewport.translate[3] = 0; - r->fb_state.width = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - r->fb_state.height = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; + r->fb_state.width = r->buffer_width; + r->fb_state.height = r->buffer_height; r->fb_state.nr_cbufs = 1; r->fb_state.zsbuf = NULL; @@ -504,6 +489,14 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r) r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler); } + memset(&rs_state, 0, sizeof(rs_state)); + /*rs_state.sprite_coord_enable */ + rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; + rs_state.point_quad_rasterization = true; + rs_state.point_size = BLOCK_WIDTH; + rs_state.gl_rasterization_rules = true; + r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state); + return true; } @@ -516,274 +509,113 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r) for (i = 0; i < 5; ++i) r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]); + + r->pipe->delete_rasterizer_state(r->pipe, r->rs_state); } static bool init_buffers(struct vl_mpeg12_mc_renderer *r) { - struct pipe_resource template; + struct pipe_resource *idct_matrix; struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS]; - struct pipe_sampler_view sampler_view; const unsigned mbw = - align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; + align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; const unsigned mbh = - align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; + align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT; - unsigned i; + unsigned i, chroma_width, chroma_height; assert(r); r->macroblocks_per_batch = mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1); - r->num_macroblocks = 0; - memset(&template, 0, sizeof(struct pipe_resource)); - template.target = PIPE_TEXTURE_2D; - /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width0 = r->pot_buffers ? - util_next_power_of_two(r->picture_width) : r->picture_width; - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height) : r->picture_height; - template.depth0 = 1; - template.usage = PIPE_USAGE_DYNAMIC; - template.bind = PIPE_BIND_SAMPLER_VIEW; - template.flags = 0; + if (!(idct_matrix = vl_idct_upload_matrix(r->pipe))) + return false; - r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template); + if (!vl_idct_init(&r->idct_luma, r->pipe, r->buffer_width, r->buffer_height, idct_matrix)) + return false; if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { - template.width0 = r->pot_buffers ? - util_next_power_of_two(r->picture_width / 2) : - r->picture_width / 2; - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height / 2) : - r->picture_height / 2; - } - else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) - template.height0 = r->pot_buffers ? - util_next_power_of_two(r->picture_height / 2) : - r->picture_height / 2; - - r->textures.individual.cb = - r->pipe->screen->resource_create(r->pipe->screen, &template); - r->textures.individual.cr = - r->pipe->screen->resource_create(r->pipe->screen, &template); - - for (i = 0; i < 3; ++i) { - u_sampler_view_default_template(&sampler_view, - r->textures.all[i], - r->textures.all[i]->format); - r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view); + chroma_width = r->buffer_width / 2; + chroma_height = r->buffer_height / 2; + } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { + chroma_width = r->buffer_width; + chroma_height = r->buffer_height / 2; + } else { + chroma_width = r->buffer_width; + chroma_height = r->buffer_height; } - r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch); - - r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f); - r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1; - r->vertex_bufs.individual.ycbcr.buffer_offset = 0; - /* XXX: Create with usage DYNAMIC or STREAM */ - r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create - ( - r->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch - ); - - r->vertex_bufs.individual.interlaced.stride = sizeof(float); - r->vertex_bufs.individual.interlaced.max_index = 4 * r->macroblocks_per_batch - 1; - r->vertex_bufs.individual.interlaced.buffer_offset = 0; - /* XXX: Create with usage DYNAMIC or STREAM */ - r->vertex_bufs.individual.interlaced.buffer = pipe_buffer_create - ( - r->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(float) * 4 * r->macroblocks_per_batch - ); - - for (i = 0; i < 4; ++i) { - r->vertex_bufs.individual.mv[i].stride = sizeof(struct vertex2f); - r->vertex_bufs.individual.mv[i].max_index = 4 * r->macroblocks_per_batch - 1; - r->vertex_bufs.individual.mv[i].buffer_offset = 0; - /* XXX: Create with usage DYNAMIC or STREAM */ - r->vertex_bufs.individual.mv[i].buffer = pipe_buffer_create - ( - r->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch - ); - } + if(!vl_idct_init(&r->idct_chroma, r->pipe, chroma_width, chroma_height, idct_matrix)) + return false; memset(&vertex_elems, 0, sizeof(vertex_elems)); - /* Rectangle element */ - vertex_elems[VS_I_RECT].src_offset = 0; - vertex_elems[VS_I_RECT].instance_divisor = 0; - vertex_elems[VS_I_RECT].vertex_buffer_index = 0; - vertex_elems[VS_I_RECT].src_format = PIPE_FORMAT_R32G32_FLOAT; + vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch); /* Position element */ - vertex_elems[VS_I_VPOS].src_offset = 0; - vertex_elems[VS_I_VPOS].instance_divisor = 0; - vertex_elems[VS_I_VPOS].vertex_buffer_index = 1; vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* progressive=1.0f interlaced=0.0f */ - vertex_elems[VS_I_INTERLACED].src_offset = 0; - vertex_elems[VS_I_INTERLACED].instance_divisor = 0; - vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 2; + /* y, cr, cb empty block element top left block */ + vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + + /* y, cr, cb empty block element top right block */ + vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + + /* y, cr, cb empty block element bottom left block */ + vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + + /* y, cr, cb empty block element bottom right block */ + vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + + /* progressive=0.0f interlaced=1.0f */ vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT; - /* First ref surface top field texcoord element */ - vertex_elems[VS_I_MV0].src_offset = 0; - vertex_elems[VS_I_MV0].instance_divisor = 0; - vertex_elems[VS_I_MV0].vertex_buffer_index = 3; - vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* First ref surface bottom field texcoord element */ - vertex_elems[VS_I_MV1].src_offset = 0; - vertex_elems[VS_I_MV1].instance_divisor = 0; - vertex_elems[VS_I_MV1].vertex_buffer_index = 4; - vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface top field texcoord element */ - vertex_elems[VS_I_MV2].src_offset = 0; - vertex_elems[VS_I_MV2].instance_divisor = 0; - vertex_elems[VS_I_MV2].vertex_buffer_index = 5; - vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Second ref surface bottom field texcoord element */ - vertex_elems[VS_I_MV3].src_offset = 0; - vertex_elems[VS_I_MV3].instance_divisor = 0; - vertex_elems[VS_I_MV3].vertex_buffer_index = 6; - vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT; - - for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) - init_mbtype_handler(r, i, vertex_elems); - - r->vs_const_buf = pipe_buffer_create - ( - r->pipe->screen, - PIPE_BIND_CONSTANT_BUFFER, - sizeof(struct vertex_shader_consts) - ); + /* frame=0.0f field=1.0f */ + vertex_elems[VS_I_FRAME_PRED].src_format = PIPE_FORMAT_R32_FLOAT; - return true; -} + /* intra=-1.0f forward/backward=1.0f bi=0.0f */ + vertex_elems[VS_I_REF_FRAMES].src_format = PIPE_FORMAT_R32_FLOAT; -static void -cleanup_buffers(struct vl_mpeg12_mc_renderer *r) -{ - unsigned i; + /* forward=0.0f backward=1.0f */ + vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT; - assert(r); + for (i = 0; i < 4; ++i) + /* motion vector 0..4 element */ + vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT; - pipe_resource_reference(&r->vs_const_buf, NULL); + r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 13, 1); - for (i = 0; i < 3; ++i) { - pipe_sampler_view_reference(&r->sampler_views.all[i], NULL); - pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL); - pipe_resource_reference(&r->textures.all[i], NULL); - } + r->vertex_elems_state = r->pipe->create_vertex_elements_state( + r->pipe, NUM_VS_INPUTS, vertex_elems); - for(i = 0; ivertex_elems_state == NULL) + return false; -static enum VL_MACROBLOCK_TYPE -get_macroblock_type(struct pipe_mpeg12_macroblock *mb) -{ - assert(mb); + r->vs = create_vert_shader(r); + r->fs = create_frag_shader(r); - switch (mb->mb_type) { - case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: - return VL_MACROBLOCK_TYPE_INTRA; - case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - VL_MACROBLOCK_TYPE_FWD_FRAME_PRED : VL_MACROBLOCK_TYPE_FWD_FIELD_PRED; - case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED : VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED; - case PIPE_MPEG12_MACROBLOCK_TYPE_BI: - return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? - VL_MACROBLOCK_TYPE_BI_FRAME_PRED : VL_MACROBLOCK_TYPE_BI_FIELD_PRED; - default: - assert(0); - } + if (r->vs == NULL || r->fs == NULL) + return false; - /* Unreachable */ - return -1; + return true; } static void -upload_vertex_stream(struct vl_mpeg12_mc_renderer *r, - unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES]) +cleanup_buffers(struct vl_mpeg12_mc_renderer *r) { - struct quadf *pos; - struct vertex2f *mv[4]; - float *interlaced; - - struct pipe_transfer *buf_transfer[7]; - - unsigned i, j; - assert(r); - assert(num_macroblocks); - pos = (struct quadf *)pipe_buffer_map - ( - r->pipe, - r->vertex_bufs.individual.ycbcr.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer[0] - ); - - interlaced = (float *)pipe_buffer_map - ( - r->pipe, - r->vertex_bufs.individual.interlaced.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer[1] - ); - - for (i = 0; i < 4; ++i) - mv[i] = (struct vertex2f *)pipe_buffer_map - ( - r->pipe, - r->vertex_bufs.individual.mv[i].buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer[i + 2] - ); + r->pipe->delete_vs_state(r->pipe, r->vs); + r->pipe->delete_fs_state(r->pipe, r->fs); - for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) { - struct vl_mc_mbtype_handler *handler = &r->mbtype_handlers[i]; - unsigned count = vl_vb_upload(&handler->pos, pos); - if (count > 0) { - unsigned ref_frames, mv_per_frame; + vl_idct_cleanup(&r->idct_luma); + vl_idct_cleanup(&r->idct_chroma); - ref_frames = const_mbtype_config[i][0]; - mv_per_frame = const_mbtype_config[i][1]; - - pos += count; - - memcpy(interlaced, handler->interlaced, sizeof(float) * count * 4); - interlaced += count * 4; - - for (j = 0; j < ref_frames * mv_per_frame; ++j) - memcpy(mv[j], handler->mv[j], sizeof(struct vertex2f) * count * 4); - - for (j = 0; j < 4; ++j) - mv[j] += count * 4; - } - num_macroblocks[i] = count; - } - - pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]); - pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.interlaced.buffer, buf_transfer[1]); - for (i = 0; i < 4; ++i) - pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 2]); + r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state); } static struct pipe_sampler_view @@ -813,104 +645,6 @@ static struct pipe_sampler_view return sampler_view; } -static unsigned -flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type, - unsigned vb_start, unsigned num_macroblocks) -{ - unsigned ref_frames, mv_per_frame; - struct vl_mc_mbtype_handler *handler; - - assert(r); - - ref_frames = const_mbtype_config[type][0]; - mv_per_frame = const_mbtype_config[type][1]; - - handler = &r->mbtype_handlers[type]; - - r->pipe->set_vertex_buffers(r->pipe, 3 + ref_frames * mv_per_frame, r->vertex_bufs.all); - r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state); - - if(ref_frames == 2) { - - r->textures.individual.ref[0] = r->past->texture; - r->textures.individual.ref[1] = r->future->texture; - r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past); - r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future); - - } else if(ref_frames == 1) { - - struct pipe_surface *ref; - - if(type == VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED || - type == VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED) - ref = r->future; - else - ref = r->past; - - r->textures.individual.ref[0] = ref->texture; - r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, ref); - } - - r->pipe->set_fragment_sampler_views(r->pipe, 3 + ref_frames, r->sampler_views.all); - r->pipe->bind_fragment_sampler_states(r->pipe, 3 + ref_frames, r->samplers.all); - r->pipe->bind_vs_state(r->pipe, handler->vs); - r->pipe->bind_fs_state(r->pipe, handler->fs); - - util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start, num_macroblocks * 4); - return num_macroblocks * 4; -} - -static void -flush(struct vl_mpeg12_mc_renderer *r) -{ - unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES] = { 0 }; - unsigned vb_start = 0, i; - - assert(r); - assert(r->num_macroblocks == r->macroblocks_per_batch); - - vl_idct_flush(&r->idct_y); - vl_idct_flush(&r->idct_cr); - vl_idct_flush(&r->idct_cb); - - upload_vertex_stream(r, num_macroblocks); - - r->pipe->set_framebuffer_state(r->pipe, &r->fb_state); - r->pipe->set_viewport_state(r->pipe, &r->viewport); - - for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) { - if (num_macroblocks[i] > 0) - vb_start += flush_mbtype_handler(r, i, vb_start, num_macroblocks[i]); - } - - r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence); - - r->num_macroblocks = 0; -} - -static void -update_render_target(struct vl_mpeg12_mc_renderer *r) -{ - struct pipe_transfer *buf_transfer; - struct vertex_shader_consts *vs_consts; - - vs_consts = pipe_buffer_map - ( - r->pipe, r->vs_const_buf, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer - ); - - vs_consts->norm.x = 1.0f / r->surface->width; - vs_consts->norm.y = 1.0f / r->surface->height; - - pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer); - - r->fb_state.cbufs[0] = r->surface; - - r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf); -} - static void get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4]) { @@ -919,8 +653,8 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4]) { if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) { - mv[1].x = mb->pmv[0][1][0]; - mv[1].y = mb->pmv[0][1][1]; + mv[2].x = mb->pmv[0][1][0]; + mv[2].y = mb->pmv[0][1][1]; } else { mv[2].x = mb->pmv[0][1][0]; @@ -978,40 +712,71 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4]) } } +static bool +empty_block(enum pipe_video_chroma_format chroma_format, + unsigned cbp, unsigned component, + unsigned x, unsigned y) +{ + /* TODO: Implement 422, 444 */ + assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); + + if(component == 0) /*luma*/ + return !(cbp & (1 << (5 - (x + y * 2)))); + else /*cr cb*/ + return !(cbp & (1 << (2 - component))); +} + static void grab_vectors(struct vl_mpeg12_mc_renderer *r, + struct vl_mpeg12_mc_buffer *buffer, struct pipe_mpeg12_macroblock *mb) { - enum VL_MACROBLOCK_TYPE type; - struct vl_mc_mbtype_handler *handler; - struct vertex2f mv[4]; - unsigned ref_frames, mv_per_frame; - unsigned i, j, pos; + struct vertex_stream stream; + + unsigned i, j; assert(r); assert(mb); - type = get_macroblock_type(mb); - - ref_frames = const_mbtype_config[type][0]; - mv_per_frame = const_mbtype_config[type][1]; - - handler = &r->mbtype_handlers[type]; + stream.pos.x = mb->mbx; + stream.pos.y = mb->mby; + for ( i = 0; i < 2; ++i) { + for ( j = 0; j < 2; ++j) { + stream.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i); + stream.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i); + stream.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i); + } + } + stream.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f; + stream.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f; + stream.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f; + switch (mb->mb_type) { + case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA: + stream.ref_frames = -1.0f; + break; - pos = handler->pos.num_blocks * 4; - vl_vb_add_block(&handler->pos, false, mb->mbx, mb->mby); + case PIPE_MPEG12_MACROBLOCK_TYPE_FWD: + case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD: + stream.ref_frames = 1.0f; + break; + + case PIPE_MPEG12_MACROBLOCK_TYPE_BI: + stream.ref_frames = 0.0f; + break; - get_motion_vectors(mb, mv); - for ( i = 0; i < 4; ++i ) { - handler->interlaced[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f; - for ( j = 0; j < ref_frames * mv_per_frame; ++j ) - handler->mv[j][i + pos] = mv[j]; + default: + assert(0); } + + get_motion_vectors(mb, stream.mv); + vl_vb_add_block(&buffer->vertex_stream, (float*)&stream); } static void -grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, - enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) +grab_blocks(struct vl_mpeg12_mc_renderer *r, + struct vl_mpeg12_mc_buffer *buffer, + unsigned mbx, unsigned mby, + unsigned cbp, short *blocks) { unsigned tb = 0; unsigned x, y; @@ -1021,38 +786,41 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) { - bool eb = !(cbp & (1 << (5 - tb))); - vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks); - blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT; + if (!empty_block(r->chroma_format, cbp, 0, x, y)) { + vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks); + blocks += BLOCK_WIDTH * BLOCK_HEIGHT; + } } } /* TODO: Implement 422, 444 */ assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); - for (tb = 0; tb < 2; ++tb) { - bool eb = !(cbp & (1 << (1 - tb))); - if(tb == 0) - vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks); - else - vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks); - blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT; + for (tb = 1; tb < 3; ++tb) { + if (!empty_block(r->chroma_format, cbp, tb, 0, 0)) { + if(tb == 1) + vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks); + else + vl_idct_add_block(&buffer->idct_cr, mbx, mby, blocks); + blocks += BLOCK_WIDTH * BLOCK_HEIGHT; + } } } static void grab_macroblock(struct vl_mpeg12_mc_renderer *r, + struct vl_mpeg12_mc_buffer *buffer, struct pipe_mpeg12_macroblock *mb) { assert(r); assert(mb); assert(mb->blocks); - assert(r->num_macroblocks < r->macroblocks_per_batch); + assert(buffer->num_macroblocks < r->macroblocks_per_batch); - grab_vectors(r, mb); - grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks); + grab_vectors(r, buffer, mb); + grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks); - ++r->num_macroblocks; + ++buffer->num_macroblocks; } static void @@ -1073,30 +841,24 @@ texview_map_delete(const struct keymap *map, bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe, - unsigned picture_width, - unsigned picture_height, + unsigned buffer_width, + unsigned buffer_height, enum pipe_video_chroma_format chroma_format, - enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode, - bool pot_buffers) + enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode) { - struct pipe_resource *idct_matrix; - assert(renderer); assert(pipe); /* TODO: Implement other policies */ assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE); - /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */ - assert(pot_buffers); memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer)); renderer->pipe = pipe; - renderer->picture_width = picture_width; - renderer->picture_height = picture_height; + renderer->buffer_width = buffer_width; + renderer->buffer_height = buffer_height; renderer->chroma_format = chroma_format; renderer->bufmode = bufmode; - renderer->pot_buffers = pot_buffers; renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1, texview_map_delete); @@ -1109,35 +871,8 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer, if (!init_buffers(renderer)) goto error_buffers; - renderer->surface = NULL; - renderer->past = NULL; - renderer->future = NULL; - renderer->num_macroblocks = 0; - - if(!(idct_matrix = vl_idct_upload_matrix(pipe))) - goto error_idct_matrix; - - if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y, idct_matrix)) - goto error_idct_y; - - if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr, idct_matrix)) - goto error_idct_cr; - - if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb, idct_matrix)) - goto error_idct_cb; - return true; -error_idct_cb: - vl_idct_cleanup(&renderer->idct_cr); - -error_idct_cr: - vl_idct_cleanup(&renderer->idct_y); - -error_idct_y: -error_idct_matrix: - cleanup_buffers(renderer); - error_buffers: cleanup_pipe_state(renderer); @@ -1151,79 +886,235 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer) { assert(renderer); - vl_idct_cleanup(&renderer->idct_y); - vl_idct_cleanup(&renderer->idct_cr); - vl_idct_cleanup(&renderer->idct_cb); - util_delete_keymap(renderer->texview_map, renderer->pipe); cleanup_pipe_state(renderer); cleanup_buffers(renderer); +} - pipe_surface_reference(&renderer->surface, NULL); - pipe_surface_reference(&renderer->past, NULL); - pipe_surface_reference(&renderer->future, NULL); +bool +vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) +{ + struct pipe_resource template; + struct pipe_sampler_view sampler_view; + + unsigned i; + + assert(renderer && buffer); + + buffer->surface = NULL; + buffer->past = NULL; + buffer->future = NULL; + buffer->num_macroblocks = 0; + + memset(&template, 0, sizeof(struct pipe_resource)); + template.target = PIPE_TEXTURE_2D; + /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */ + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width0 = renderer->buffer_width; + template.height0 = renderer->buffer_height; + template.depth0 = 1; + template.usage = PIPE_USAGE_DYNAMIC; + template.bind = PIPE_BIND_SAMPLER_VIEW; + template.flags = 0; + + buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template); + + if (!vl_idct_init_buffer(&renderer->idct_luma, &buffer->idct_y, buffer->textures.individual.y)) + return false; + + if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { + template.width0 = renderer->buffer_width / 2; + template.height0 = renderer->buffer_height / 2; + } + else if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) + template.height0 = renderer->buffer_height / 2; + + buffer->textures.individual.cb = + renderer->pipe->screen->resource_create(renderer->pipe->screen, &template); + buffer->textures.individual.cr = + renderer->pipe->screen->resource_create(renderer->pipe->screen, &template); + + if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cb, buffer->textures.individual.cb)) + return false; + + if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cr, buffer->textures.individual.cr)) + return false; + + for (i = 0; i < 3; ++i) { + u_sampler_view_default_template(&sampler_view, + buffer->textures.all[i], + buffer->textures.all[i]->format); + sampler_view.swizzle_r = i == 0 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO; + sampler_view.swizzle_g = i == 1 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO; + sampler_view.swizzle_b = i == 2 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO; + sampler_view.swizzle_a = PIPE_SWIZZLE_ONE; + buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view( + renderer->pipe, buffer->textures.all[i], &sampler_view); + } + + buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride; + buffer->vertex_bufs.individual.quad.max_index = renderer->quad.max_index; + buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset; + pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer); + + buffer->vertex_bufs.individual.stream = vl_vb_init( + &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch, + sizeof(struct vertex_stream) / sizeof(float), + renderer->vertex_stream_stride); + + return true; +} + +void +vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) +{ + unsigned i; + + assert(renderer && buffer); + + for (i = 0; i < 3; ++i) { + pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL); + pipe_resource_reference(&buffer->vertex_bufs.all[i].buffer, NULL); + pipe_resource_reference(&buffer->textures.all[i], NULL); + } + + pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL); + vl_vb_cleanup(&buffer->vertex_stream); + + vl_idct_cleanup_buffer(&renderer->idct_luma, &buffer->idct_y); + vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cb); + vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cr); + + pipe_surface_reference(&buffer->surface, NULL); + pipe_surface_reference(&buffer->past, NULL); + pipe_surface_reference(&buffer->future, NULL); +} + +void +vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) +{ + assert(renderer && buffer); + + vl_idct_map_buffers(&renderer->idct_luma, &buffer->idct_y); + vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cr); + vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cb); + + vl_vb_map(&buffer->vertex_stream, renderer->pipe); } void -vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer - *renderer, +vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer, + struct vl_mpeg12_mc_buffer *buffer, struct pipe_surface *surface, struct pipe_surface *past, struct pipe_surface *future, unsigned num_macroblocks, - struct pipe_mpeg12_macroblock - *mpeg12_macroblocks, + struct pipe_mpeg12_macroblock *mpeg12_macroblocks, struct pipe_fence_handle **fence) { - bool new_surface = false; - - assert(renderer); + assert(renderer && buffer); assert(surface); assert(num_macroblocks); assert(mpeg12_macroblocks); - if (renderer->surface) { - if (surface != renderer->surface) { - if (renderer->num_macroblocks > 0) { - flush(renderer); - } - - new_surface = true; - } - + if (surface != buffer->surface) { + pipe_surface_reference(&buffer->surface, surface); + pipe_surface_reference(&buffer->past, past); + pipe_surface_reference(&buffer->future, future); + buffer->fence = fence; + } else { /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */ - assert(surface != renderer->surface || renderer->past == past); - assert(surface != renderer->surface || renderer->future == future); - } - else - new_surface = true; - - if (new_surface) { - pipe_surface_reference(&renderer->surface, surface); - pipe_surface_reference(&renderer->past, past); - pipe_surface_reference(&renderer->future, future); - renderer->fence = fence; - update_render_target(renderer); + assert(buffer->past == past); + assert(buffer->future == future); } while (num_macroblocks) { - unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks; + unsigned left_in_batch = renderer->macroblocks_per_batch - buffer->num_macroblocks; unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch); unsigned i; for (i = 0; i < num_to_submit; ++i) { assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12); - grab_macroblock(renderer, &mpeg12_macroblocks[i]); + grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]); } num_macroblocks -= num_to_submit; - if (renderer->num_macroblocks == renderer->macroblocks_per_batch) { - flush(renderer); - /* Next time we get this surface it may have new ref frames */ - pipe_surface_reference(&renderer->surface, NULL); - pipe_surface_reference(&renderer->past, NULL); - pipe_surface_reference(&renderer->future, NULL); + if (buffer->num_macroblocks == renderer->macroblocks_per_batch) { + vl_mpeg12_mc_unmap_buffer(renderer, buffer); + vl_mpeg12_mc_renderer_flush(renderer, buffer); + pipe_surface_reference(&buffer->surface, surface); + pipe_surface_reference(&buffer->past, past); + pipe_surface_reference(&buffer->future, future); + vl_mpeg12_mc_map_buffer(renderer, buffer); } } } + +void +vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) +{ + assert(renderer && buffer); + + vl_idct_unmap_buffers(&renderer->idct_luma, &buffer->idct_y); + vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cr); + vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cb); + + vl_vb_unmap(&buffer->vertex_stream, renderer->pipe); +} + +void +vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer) +{ + assert(renderer && buffer); + assert(buffer->num_macroblocks <= renderer->macroblocks_per_batch); + + if (buffer->num_macroblocks == 0) + return; + + vl_idct_flush(&renderer->idct_luma, &buffer->idct_y); + vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cr); + vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cb); + + vl_vb_restart(&buffer->vertex_stream); + + renderer->fb_state.cbufs[0] = buffer->surface; + renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state); + renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state); + renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport); + renderer->pipe->set_vertex_buffers(renderer->pipe, 2, buffer->vertex_bufs.all); + renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state); + + if (buffer->past) { + buffer->textures.individual.ref[0] = buffer->past->texture; + buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past); + } else { + buffer->textures.individual.ref[0] = buffer->surface->texture; + buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface); + } + + if (buffer->future) { + buffer->textures.individual.ref[1] = buffer->future->texture; + buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future); + } else { + buffer->textures.individual.ref[1] = buffer->surface->texture; + buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface); + } + + renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all); + renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all); + + renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs); + renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs); + util_draw_arrays(renderer->pipe, PIPE_PRIM_QUADS, 0, buffer->num_macroblocks * 4); + + renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence); + + /* Next time we get this surface it may have new ref frames */ + pipe_surface_reference(&buffer->surface, NULL); + pipe_surface_reference(&buffer->past, NULL); + pipe_surface_reference(&buffer->future, NULL); + + buffer->num_macroblocks = 0; +}