Using a seperate vertex buffer for mc and ycbcr handling is still better.
create_vert_shader(struct vl_idct *idct, bool matrix_stage)
{
struct ureg_program *shader;
- struct ureg_src vrect, vpos, vblock, eb;
- struct ureg_src scale, blocks_xy;
+ struct ureg_src vrect, vpos;
+ struct ureg_src scale;
struct ureg_dst t_tex, t_start;
struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
- unsigned label;
shader = ureg_create(TGSI_PROCESSOR_VERTEX);
if (!shader)
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
- vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
- eb = ureg_DECL_vs_input(shader, VS_I_EB);
-
o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
/*
* scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
- * blocks_xy = (blocks_x, blocks_y)
- *
- * if eb.(vblock.y, vblock.x)
- * o_vpos.xy = -1
- * else
- * t_tex = vpos * blocks_xy + vblock
- * t_start = t_tex * scale
- * t_tex = t_tex + vrect
- * o_vpos.xy = t_tex * scale
*
- * o_l_addr = calc_addr(...)
- * o_r_addr = calc_addr(...)
- * endif
+ * t_vpos = vpos + vrect
+ * o_vpos.xy = t_vpos * scale
* o_vpos.zw = vpos
*
+ * o_l_addr = calc_addr(...)
+ * o_r_addr = calc_addr(...)
+ *
*/
scale = ureg_imm2f(shader,
(float)BLOCK_WIDTH / idct->buffer_width,
(float)BLOCK_HEIGHT / idct->buffer_height);
- blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
+ ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
+ ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+ ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
+ ureg_scalar(vrect, TGSI_SWIZZLE_X),
+ ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
- if (idct->blocks_x > 1 || idct->blocks_y > 1) {
- ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
- ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
- ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
- ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
+ ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
+ ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
- ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
- ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
- ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
- ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
+ ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
- eb = ureg_src(t_tex);
+ if(matrix_stage) {
+ calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+ calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+ } else {
+ calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+ calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
}
- ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
-
- ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
-
- ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
- ureg_ELSE(shader, &label);
-
- ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock);
- ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-
- ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect);
-
- ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
- ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
- ureg_scalar(vrect, TGSI_SWIZZLE_X),
- ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
-
- ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
-
- if(matrix_stage) {
- calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
- calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
- } else {
- calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
- calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
- }
-
- ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
- ureg_ENDIF(shader);
-
- ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
-
ureg_release_temporary(shader, t_tex);
ureg_release_temporary(shader, t_start);
bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
- unsigned blocks_x, unsigned blocks_y,
unsigned nr_of_render_targets,
struct pipe_sampler_view *matrix,
struct pipe_sampler_view *transpose)
idct->pipe = pipe;
idct->buffer_width = buffer_width;
idct->buffer_height = buffer_height;
- idct->blocks_x = blocks_x;
- idct->blocks_y = blocks_y;
idct->nr_of_render_targets = nr_of_render_targets;
pipe_sampler_view_reference(&idct->matrix, matrix);
void
vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
{
- unsigned num_verts;
-
assert(idct);
assert(buffer);
if(num_instances > 0) {
- num_verts = idct->blocks_x * idct->blocks_y * 4;
-
idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
idct->pipe->bind_blend_state(idct->pipe, idct->blend);
idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
- util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
+ util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
/* second stage */
idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
- util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
+ util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
}
}
unsigned buffer_width;
unsigned buffer_height;
- unsigned blocks_x, blocks_y;
unsigned nr_of_render_targets;
void *rs_state;
/* init an idct instance */
bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
unsigned buffer_width, unsigned buffer_height,
- unsigned blocks_x, unsigned blocks_y,
unsigned nr_of_render_targets,
struct pipe_sampler_view *matrix,
struct pipe_sampler_view *transpose);
enum VS_OUTPUT
{
VS_O_VPOS,
- VS_O_LINE,
VS_O_VTOP,
VS_O_VBOTTOM
};
static struct ureg_dst
-calc_position(struct vl_mc *r, struct ureg_program *shader)
+calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale)
{
- struct ureg_src block_scale;
struct ureg_src vrect, vpos;
struct ureg_dst t_vpos;
struct ureg_dst o_vpos;
* o_vpos.xy = t_vpos
* o_vpos.zw = vpos
*/
- block_scale = ureg_imm2f(shader,
- (float)MACROBLOCK_WIDTH / r->buffer_width,
- (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
- ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+ ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
return t_vpos;
}
-static void *
-create_ycbcr_vert_shader(struct vl_mc *r)
+static struct ureg_dst
+calc_line(struct ureg_program *shader)
{
- struct ureg_program *shader;
- struct ureg_src block_scale;
- struct ureg_src vrect, vpos, eb, flags;
- struct ureg_dst t_vpos, t_vtex;
- struct ureg_dst o_line, o_vtex[2];
- unsigned label;
-
- shader = ureg_create(TGSI_PROCESSOR_VERTEX);
- if (!shader)
- return NULL;
-
- vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
- vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
- eb = ureg_DECL_vs_input(shader, VS_I_EB);
- flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
+ struct ureg_dst tmp;
+ struct ureg_src pos;
- t_vpos = calc_position(r, shader);
- t_vtex = ureg_DECL_temporary(shader);
+ tmp = ureg_DECL_temporary(shader);
- o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
- o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
- o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+ pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);
/*
- * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
- *
- * o_line.x = interlaced
- * o_line.y = vrect
- *
- * o_vtex[0].z = vrect.x ? eb.y : eb.x
- * o_vtex[1].z = vrect.x ? eb.w : eb.z
- *
- * if(interlaced) {
- * t_vtex.x = vrect.x
- * t_vtex.y = vrect.y * 0.5
- * t_vtex += vpos
- *
- * o_vtex[0].xy = t_vtex * block_scale
- *
- * t_vtex.y += 0.5
- * o_vtex[1].xy = t_vtex * block_scale
- * } else {
- * o_vtex[0..1].xy = t_vpos
- * }
- * o_vtex[2].xy = t_vpos
- *
+ * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
*/
- block_scale = ureg_imm2f(shader,
- (float)MACROBLOCK_WIDTH / r->buffer_width,
- (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
- ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f));
- ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect);
-
- ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
- ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
- ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
- ureg_scalar(eb, TGSI_SWIZZLE_Y),
- ureg_scalar(eb, TGSI_SWIZZLE_X));
-
- ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
- ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
- ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
- ureg_scalar(eb, TGSI_SWIZZLE_W),
- ureg_scalar(eb, TGSI_SWIZZLE_Z));
-
- if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
- ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
-
- ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
- ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
- ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
- ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
- ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
- ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-
- ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
- ureg_scalar(vrect, TGSI_SWIZZLE_Y),
- ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-
- ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
- ureg_ENDIF(shader);
- }
-
- ureg_release_temporary(shader, t_vtex);
- ureg_release_temporary(shader, t_vpos);
-
- ureg_END(shader);
+ ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
+ ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+ ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
- return ureg_create_shader_and_destroy(shader, r->pipe);
+ return tmp;
}
static void *
struct ureg_src mv_scale;
struct ureg_src vrect, vmv[2];
struct ureg_dst t_vpos;
- struct ureg_dst o_vpos, o_line, o_vmv[2];
+ struct ureg_dst o_vpos, o_vmv[2];
unsigned i;
shader = ureg_create(TGSI_PROCESSOR_VERTEX);
return NULL;
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
- ureg_DECL_vs_input(shader, VS_I_EB);
- ureg_DECL_vs_input(shader, VS_I_FLAGS);
vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
- t_vpos = calc_position(r, shader);
+ t_vpos = calc_position(r, shader, ureg_imm2f(shader,
+ (float)MACROBLOCK_WIDTH / r->buffer_width,
+ (float)MACROBLOCK_HEIGHT / r->buffer_height)
+ );
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
- o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
/*
- * mv_scale = 0.5 / (dst.width, dst.height);
+ * mv_scale.xy = 0.5 / (dst.width, dst.height);
+ * mv_scale.z = 1.0f / 4.0f
+ * mv_scale.w = 1.0f / 255.0f
*
* // Apply motion vectors
- * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
- *
- * o_line.y = vrect
+ * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos
+ * o_vmv[0..1].zw = vmv[0..1] * mv_scale
*
*/
- ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
- vrect, ureg_imm1f(shader, r->macroblock_size / 2));
-
mv_scale = ureg_imm4f(shader,
0.5f / r->buffer_width,
0.5f / r->buffer_height,
return ureg_create_shader_and_destroy(shader, r->pipe);
}
-static struct ureg_dst
-calc_field(struct ureg_program *shader)
-{
- struct ureg_dst tmp;
- struct ureg_src line;
-
- tmp = ureg_DECL_temporary(shader);
-
- line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
-
- /*
- * line.x is flag for intra frames
- * line.y going from 0 to 1 if not interlaced
- * line.y going from 0 to 8 in steps of 0.5 if interlaced
- *
- * tmp.xy = fraction(line)
- * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
- */
- ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line);
- ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
- ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-
- return tmp;
-}
-
-static void *
-create_ycbcr_frag_shader(struct vl_mc *r, float scale)
-{
- struct ureg_program *shader;
- struct ureg_src tc[2], sampler;
- struct ureg_dst texel, t_tc, field;
- struct ureg_dst fragment;
- unsigned label;
-
- shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
- if (!shader)
- return NULL;
-
- tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
- tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
-
- sampler = ureg_DECL_sampler(shader, 0);
-
- t_tc = ureg_DECL_temporary(shader);
- texel = ureg_DECL_temporary(shader);
-
- fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
- field = calc_field(shader);
-
- /*
- * texel.y = tex(field.y ? tc[1] : tc[0], sampler[0])
- * texel.cb = tex(tc[2], sampler[1])
- * texel.cr = tex(tc[2], sampler[2])
- */
-
- ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
- ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
- tc[1], tc[0]);
-
- ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
-
- ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f));
- ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
-
- ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-
- if (scale != 1.0f)
- ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
- ureg_src(texel), ureg_imm1f(shader, scale),
- ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
- else
- ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
- ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
-
- ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
- ureg_ENDIF(shader);
-
- ureg_release_temporary(shader, t_tc);
- ureg_release_temporary(shader, texel);
-
- return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
static void *
create_ref_frag_shader(struct vl_mc *r)
{
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
- field = calc_field(shader);
+ field = calc_line(shader);
/*
* ref = field.z ? tc[1] : tc[0]
return ureg_create_shader_and_destroy(shader, r->pipe);
}
+static void *
+create_ycbcr_vert_shader(struct vl_mc *r)
+{
+ struct ureg_program *shader;
+
+ struct ureg_src vrect, vpos;
+ struct ureg_dst t_vpos, t_vtex;
+ struct ureg_dst o_vpos, o_vtex;
+
+ struct vertex2f scale = {
+ (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size,
+ (float)BLOCK_HEIGHT / r->buffer_height * MACROBLOCK_HEIGHT / r->macroblock_size
+ };
+
+ unsigned label;
+
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return NULL;
+
+ vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+ vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+ t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
+ t_vtex = ureg_DECL_temporary(shader);
+
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+ o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+
+ /*
+ * o_vtex.xy = t_vpos
+ * o_vtex.z = intra * 0.5
+ *
+ * if(interlaced) {
+ * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
+ * t_vtex.z = vpos.y % 2
+ * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
+ * o_vpos.y = t_vtex.y + t_vpos.y
+ *
+ * o_vtex.w = t_vtex.z ? 0 : 1
+ * }
+ *
+ */
+ ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+ ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z),
+ ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
+ ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
+
+ if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
+ ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);
+
+ ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
+ ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
+ ureg_imm2f(shader, 0.0f, scale.y),
+ ureg_imm2f(shader, -scale.y, 0.0f));
+ ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
+ ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));
+
+ ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));
+
+ ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
+ ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+ ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
+ ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
+ ureg_src(t_vpos), ureg_src(t_vtex));
+
+ ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W),
+ ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+ ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));
+
+ ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+ ureg_ENDIF(shader);
+ }
+
+ ureg_release_temporary(shader, t_vtex);
+ ureg_release_temporary(shader, t_vpos);
+
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ycbcr_frag_shader(struct vl_mc *r, float scale)
+{
+ struct ureg_program *shader;
+ struct ureg_src tc, sampler;
+ struct ureg_dst tmp;
+ struct ureg_dst fragment;
+ unsigned label;
+
+ shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!shader)
+ return NULL;
+
+ tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+
+ sampler = ureg_DECL_sampler(shader, 0);
+
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+ tmp = calc_line(shader);
+
+ /*
+ * if (field == tc.w)
+ * kill();
+ * else {
+ * fragment.xyz = tex(tc, sampler) * scale + tc.z
+ * fragment.w = 1.0f
+ * }
+ */
+
+ ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+ ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp));
+
+ ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+
+ ureg_KILP(shader);
+
+ ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+ ureg_ELSE(shader, &label);
+
+ ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler);
+
+ if (scale != 1.0f)
+ ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+ ureg_src(tmp), ureg_imm1f(shader, scale),
+ ureg_scalar(tc, TGSI_SWIZZLE_Z));
+ else
+ ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+ ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z));
+
+ ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+ ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+ ureg_ENDIF(shader);
+
+ ureg_release_temporary(shader, tmp);
+
+ return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
static bool
init_pipe_state(struct vl_mc *r)
{
}
static void
-upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsigned y, short *block)
+upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
+ unsigned x, unsigned y, short *block,
+ bool intra, enum pipe_mpeg12_dct_type type)
{
unsigned tex_pitch;
short *texels;
assert(buffer);
assert(block);
+ vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
+
tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x, ++tb) {
if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
- upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+ upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks,
+ mb->dct_intra, mb->dct_type);
blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
}
}
for (tb = 1; tb < 3; ++tb) {
if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
- upload_block(buffer, tb, mb->mbx, mb->mby, blocks);
+ upload_block(buffer, tb, mb->mbx, mb->mby, blocks,
+ mb->dct_intra, mb->dct_type);
blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
}
}
assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
for ( i = 0; i < num_macroblocks; ++i ) {
- vl_vb_add_block(&buf->vertex_stream, &mb[i], dec->empty_block_mask);
+ vl_vb_add_block(&buf->vertex_stream, &mb[i]);
upload_buffer(dec, buf, &mb[i]);
}
}
static bool
init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
{
- unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
+ unsigned chroma_width, chroma_height;
struct pipe_sampler_view *matrix, *transpose;
float matrix_scale, transpose_scale;
pipe_sampler_view_reference(&transpose, matrix);
if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
- 2, 2, dec->nr_of_idct_render_targets, matrix, transpose))
+ dec->nr_of_idct_render_targets, matrix, transpose))
goto error_y;
if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
chroma_width = buffer_width / 2;
chroma_height = buffer_height / 2;
- chroma_blocks_x = 1;
- chroma_blocks_y = 1;
} else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
chroma_width = buffer_width;
chroma_height = buffer_height / 2;
- chroma_blocks_x = 2;
- chroma_blocks_y = 1;
} else {
chroma_width = buffer_width;
chroma_height = buffer_height;
- chroma_blocks_x = 2;
- chroma_blocks_y = 2;
}
if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
- chroma_blocks_x, chroma_blocks_y,
dec->nr_of_idct_render_targets, matrix, transpose))
goto error_c;
dec->pipe = pipe;
- dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
+ dec->quads = vl_vb_upload_quads(dec->pipe);
dec->pos = vl_vb_upload_pos(
dec->pipe,
dec->base.width / MACROBLOCK_WIDTH,
struct vl_ycbcr_vertex_stream
{
- struct vertex2s pos;
- uint8_t mb_type_intra;
- uint8_t dct_type_field;
- uint8_t dummy[2];
- uint8_t eb[2][2];
+ uint8_t x;
+ uint8_t y;
+ uint8_t intra;
+ uint8_t field;
};
struct vl_mv_vertex_stream
};
struct pipe_vertex_buffer
-vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y)
+vl_vb_upload_quads(struct pipe_context *pipe)
{
struct pipe_vertex_buffer quad;
struct pipe_transfer *buf_transfer;
- struct vertex4f *v;
+ struct vertex2f *v;
- unsigned x, y, i;
+ unsigned i;
assert(pipe);
/* create buffer */
- quad.stride = sizeof(struct vertex4f);
+ quad.stride = sizeof(struct vertex2f);
quad.buffer_offset = 0;
quad.buffer = pipe_buffer_create
(
pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
PIPE_USAGE_STATIC,
- sizeof(struct vertex4f) * 4 * blocks_x * blocks_y
+ sizeof(struct vertex2f) * 4
);
if(!quad.buffer)
&buf_transfer
);
- for ( y = 0; y < blocks_y; ++y) {
- for ( x = 0; x < blocks_x; ++x) {
- for (i = 0; i < 4; ++i, ++v) {
- v->x = block_quad[i].x;
- v->y = block_quad[i].y;
-
- v->z = x;
- v->w = y;
- }
- }
+ for (i = 0; i < 4; ++i, ++v) {
+ v->x = block_quad[i].x;
+ v->y = block_quad[i].y;
}
pipe_buffer_unmap(pipe, buf_transfer);
element.src_offset = 0;
element.instance_divisor = 0;
element.vertex_buffer_index = 0;
- element.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ element.src_format = PIPE_FORMAT_R32G32_FLOAT;
return element;
}
static void
vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
- unsigned vertex_buffer_index)
+ unsigned vertex_buffer_index)
{
unsigned i, offset = 0;
vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
/* Position element */
- vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
- /* flags */
- vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
-
- /* empty block element of selected component */
- vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
-
- vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
+ vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
- return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
+ return pipe->create_vertex_elements_state(pipe, 2, vertex_elems);
}
void *
}
void
-vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height)
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
+ unsigned width, unsigned height)
{
unsigned i, size;
pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
PIPE_USAGE_STREAM,
- sizeof(struct vl_ycbcr_vertex_stream) * size
+ sizeof(struct vl_ycbcr_vertex_stream) * size * 4
);
}
}
+void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+ unsigned component, unsigned x, unsigned y,
+ bool intra, enum pipe_mpeg12_dct_type type)
+{
+ struct vl_ycbcr_vertex_stream *stream;
+
+ assert(buffer);
+ assert(buffer->ycbcr[component].num_instances < buffer->width * buffer->height * 4);
+
+ stream = buffer->ycbcr[component].vertex_stream++;
+ stream->x = x;
+ stream->y = y;
+ stream->intra = intra;
+ stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
+
+ buffer->ycbcr[component].num_instances++;
+}
+
static void
get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2])
{
dst[1].w = src->bottom.wheight;
}
-static bool
-get_ycbcr_vectors(struct vl_ycbcr_vertex_stream *stream,
- struct pipe_mpeg12_macroblock *mb, const unsigned (*empty_block_mask)[2][2])
-{
- bool completely_empty = true;
- unsigned i, j;
-
- stream->pos.x = mb->mbx;
- stream->pos.y = mb->mby;
- stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
- stream->mb_type_intra = mb->dct_intra;
-
- for ( i = 0; i < 2; ++i)
- for ( j = 0; j < 2; ++j) {
- bool empty = !(mb->cbp & (*empty_block_mask)[i][j]);
- stream->eb[i][j] = empty;
- completely_empty &= empty;
- }
-
- return !completely_empty;
-}
-
void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
- const unsigned (*empty_block_mask)[3][2][2])
+vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb)
{
- unsigned i, mv_pos;
+ unsigned mv_pos;
assert(buffer);
assert(mb);
- if(mb->cbp) {
- for (i = 0; i < VL_MAX_PLANES; ++i) {
- assert(buffer->ycbcr[i].num_instances < buffer->width * buffer->height);
- if (get_ycbcr_vectors(buffer->ycbcr[i].vertex_stream, mb, &(*empty_block_mask)[i])) {
- buffer->ycbcr[i].vertex_stream++;
- buffer->ycbcr[i].num_instances++;
- }
- }
- }
-
mv_pos = mb->mbx + mb->mby * buffer->width;
get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv);
get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv);
assert(buffer);
- for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+ for (i = 0; i < VL_MAX_PLANES; ++i) {
pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
}
VS_I_RECT,
VS_I_VPOS,
- VS_I_FLAGS,
- VS_I_EB,
-
- VS_I_MV_TOP = VS_I_FLAGS,
- VS_I_MV_BOTTOM = VS_I_EB,
+ VS_I_MV_TOP,
+ VS_I_MV_BOTTOM,
NUM_VS_INPUTS
};
} mv[VL_MAX_REF_FRAMES];
};
-struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
- unsigned blocks_x, unsigned blocks_y);
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe);
struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
-void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
- const unsigned (*empty_block_mask)[3][2][2]);
+void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+ unsigned component, unsigned x, unsigned y,
+ bool intra, enum pipe_mpeg12_dct_type type);
+
+void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb);
void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);