1 /**************************************************************************
3 * Copyright 2009 Younes Manton.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <util/u_draw.h>
39 #include <tgsi/tgsi_ureg.h>
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
44 #define BLOCK_HEIGHT 8
62 static const unsigned const_empty_block_mask_420
[3][2][2] = {
63 { { 0x20, 0x10 }, { 0x08, 0x04 } },
64 { { 0x02, 0x02 }, { 0x02, 0x02 } },
65 { { 0x01, 0x01 }, { 0x01, 0x01 } }
69 create_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
71 struct ureg_program
*shader
;
72 struct ureg_src block_scale
, mv_scale
;
73 struct ureg_src vrect
, vpos
, eb
[2][2], vmv
[4];
74 struct ureg_dst t_vpos
, t_vtex
, t_vmv
;
75 struct ureg_dst o_vpos
, o_line
, o_vtex
[3], o_eb
[2], o_vmv
[4], o_info
;
78 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
82 t_vpos
= ureg_DECL_temporary(shader
);
83 t_vtex
= ureg_DECL_temporary(shader
);
84 t_vmv
= ureg_DECL_temporary(shader
);
86 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
87 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
88 eb
[0][0] = ureg_DECL_vs_input(shader
, VS_I_EB_0_0
);
89 eb
[1][0] = ureg_DECL_vs_input(shader
, VS_I_EB_1_0
);
90 eb
[0][1] = ureg_DECL_vs_input(shader
, VS_I_EB_0_1
);
91 eb
[1][1] = ureg_DECL_vs_input(shader
, VS_I_EB_1_1
);
93 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
94 o_line
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_LINE
);
95 o_vtex
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX0
);
96 o_vtex
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX1
);
97 o_vtex
[2] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX2
);
98 o_eb
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_0
);
99 o_eb
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_1
);
100 o_info
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_INFO
);
102 for (i
= 0; i
< 4; ++i
) {
103 vmv
[i
] = ureg_DECL_vs_input(shader
, VS_I_MV0
+ i
);
104 o_vmv
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_MV0
+ i
);
108 * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
109 * mv_scale = 0.5 / (dst.width, dst.height);
111 * t_vpos = (vpos + vrect) * block_scale
115 * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
117 * o_frame_pred = frame_pred
118 * o_info.x = ref_frames
119 * o_info.y = ref_frames > 0
120 * o_info.z = bkwd_pred
122 * // Apply motion vectors
123 * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale
125 * o_line.xy = vrect * 8
126 * o_line.z = interlaced
128 * if(eb[0][0].w) { //interlaced
130 * t_vtex.y = vrect.y * 0.5
133 * o_vtex[0].xy = t_vtex * block_scale
136 * o_vtex[1].xy = t_vtex * block_scale
138 * o_vtex[0..1].xy = t_vpos
140 * o_vtex[2].xy = t_vpos
143 block_scale
= ureg_imm2f(shader
,
144 (float)MACROBLOCK_WIDTH
/ r
->buffer_width
,
145 (float)MACROBLOCK_HEIGHT
/ r
->buffer_height
);
147 mv_scale
= ureg_imm2f(shader
,
148 0.5f
/ r
->buffer_width
,
149 0.5f
/ r
->buffer_height
);
151 ureg_ADD(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
152 ureg_MUL(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
), block_scale
);
153 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
154 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), vpos
);
156 ureg_CMP(shader
, ureg_writemask(o_eb
[0], TGSI_WRITEMASK_XYZ
),
157 ureg_negate(ureg_scalar(vrect
, TGSI_SWIZZLE_X
)),
159 ureg_CMP(shader
, ureg_writemask(o_eb
[1], TGSI_WRITEMASK_XYZ
),
160 ureg_negate(ureg_scalar(vrect
, TGSI_SWIZZLE_X
)),
163 ureg_MOV(shader
, ureg_writemask(o_info
, TGSI_WRITEMASK_X
),
164 ureg_scalar(eb
[1][1], TGSI_SWIZZLE_W
));
165 ureg_SGE(shader
, ureg_writemask(o_info
, TGSI_WRITEMASK_Y
),
166 ureg_scalar(eb
[1][1], TGSI_SWIZZLE_W
), ureg_imm1f(shader
, 0.0f
));
167 ureg_MOV(shader
, ureg_writemask(o_info
, TGSI_WRITEMASK_Z
),
168 ureg_scalar(eb
[1][0], TGSI_SWIZZLE_W
));
170 ureg_MAD(shader
, ureg_writemask(o_vmv
[0], TGSI_WRITEMASK_XY
), mv_scale
, vmv
[0], ureg_src(t_vpos
));
171 ureg_MAD(shader
, ureg_writemask(o_vmv
[2], TGSI_WRITEMASK_XY
), mv_scale
, vmv
[2], ureg_src(t_vpos
));
173 ureg_CMP(shader
, ureg_writemask(t_vmv
, TGSI_WRITEMASK_XY
),
174 ureg_negate(ureg_scalar(eb
[0][1], TGSI_SWIZZLE_W
)),
176 ureg_MAD(shader
, ureg_writemask(o_vmv
[1], TGSI_WRITEMASK_XY
), mv_scale
, ureg_src(t_vmv
), ureg_src(t_vpos
));
178 ureg_CMP(shader
, ureg_writemask(t_vmv
, TGSI_WRITEMASK_XY
),
179 ureg_negate(ureg_scalar(eb
[0][1], TGSI_SWIZZLE_W
)),
181 ureg_MAD(shader
, ureg_writemask(o_vmv
[3], TGSI_WRITEMASK_XY
), mv_scale
, ureg_src(t_vmv
), ureg_src(t_vpos
));
183 ureg_MOV(shader
, ureg_writemask(o_vtex
[0], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
184 ureg_MOV(shader
, ureg_writemask(o_vtex
[1], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
185 ureg_MOV(shader
, ureg_writemask(o_vtex
[2], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
187 ureg_MOV(shader
, ureg_writemask(o_line
, TGSI_WRITEMASK_X
), ureg_scalar(vrect
, TGSI_SWIZZLE_Y
));
188 ureg_MUL(shader
, ureg_writemask(o_line
, TGSI_WRITEMASK_Y
),
189 vrect
, ureg_imm1f(shader
, MACROBLOCK_HEIGHT
/ 2));
191 ureg_IF(shader
, ureg_scalar(eb
[0][0], TGSI_SWIZZLE_W
), &label
);
193 ureg_MOV(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_X
), vrect
);
194 ureg_MUL(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_Y
), vrect
, ureg_imm1f(shader
, 0.5f
));
195 ureg_ADD(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_XY
), vpos
, ureg_src(t_vtex
));
196 ureg_MUL(shader
, ureg_writemask(o_vtex
[0], TGSI_WRITEMASK_XY
), ureg_src(t_vtex
), block_scale
);
197 ureg_ADD(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_Y
), ureg_src(t_vtex
), ureg_imm1f(shader
, 0.5f
));
198 ureg_MUL(shader
, ureg_writemask(o_vtex
[1], TGSI_WRITEMASK_XY
), ureg_src(t_vtex
), block_scale
);
200 ureg_MUL(shader
, ureg_writemask(o_line
, TGSI_WRITEMASK_X
),
201 ureg_scalar(vrect
, TGSI_SWIZZLE_Y
),
202 ureg_imm1f(shader
, MACROBLOCK_HEIGHT
/ 2));
204 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
207 ureg_release_temporary(shader
, t_vtex
);
208 ureg_release_temporary(shader
, t_vpos
);
209 ureg_release_temporary(shader
, t_vmv
);
213 return ureg_create_shader_and_destroy(shader
, r
->pipe
);
216 static struct ureg_dst
217 calc_field(struct ureg_program
*shader
)
220 struct ureg_src line
;
222 tmp
= ureg_DECL_temporary(shader
);
224 line
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_LINE
, TGSI_INTERPOLATE_LINEAR
);
227 * line.x going from 0 to 1 if not interlaced
228 * line.x going from 0 to 8 in steps of 0.5 if interlaced
229 * line.y going from 0 to 8 in steps of 0.5
231 * tmp.xy = fraction(line)
232 * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
234 ureg_FRC(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), line
);
235 ureg_SGE(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), ureg_imm1f(shader
, 0.5f
));
240 static struct ureg_dst
241 fetch_ycbcr(struct vl_mpeg12_mc_renderer
*r
, struct ureg_program
*shader
, struct ureg_dst field
)
243 struct ureg_src tc
[3], sampler
[3], eb
[2];
244 struct ureg_dst texel
, t_tc
, t_eb_info
;
247 texel
= ureg_DECL_temporary(shader
);
248 t_tc
= ureg_DECL_temporary(shader
);
249 t_eb_info
= ureg_DECL_temporary(shader
);
251 tc
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX0
, TGSI_INTERPOLATE_LINEAR
);
252 tc
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX1
, TGSI_INTERPOLATE_LINEAR
);
253 tc
[2] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX2
, TGSI_INTERPOLATE_LINEAR
);
255 eb
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_0
, TGSI_INTERPOLATE_CONSTANT
);
256 eb
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_1
, TGSI_INTERPOLATE_CONSTANT
);
258 for (i
= 0; i
< 3; ++i
) {
259 sampler
[i
] = ureg_DECL_sampler(shader
, i
);
263 * texel.y = tex(field.y ? tc[1] : tc[0], sampler[0])
264 * texel.cb = tex(tc[2], sampler[1])
265 * texel.cr = tex(tc[2], sampler[2])
268 ureg_CMP(shader
, ureg_writemask(t_tc
, TGSI_WRITEMASK_XY
),
269 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_X
)),
272 ureg_CMP(shader
, ureg_writemask(t_eb_info
, TGSI_WRITEMASK_XYZ
),
273 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_X
)),
276 /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
277 ureg_SLT(shader
, ureg_writemask(t_eb_info
, TGSI_WRITEMASK_XYZ
), ureg_src(t_eb_info
), ureg_imm1f(shader
, 0.5f
));
279 ureg_MOV(shader
, ureg_writemask(texel
, TGSI_WRITEMASK_XYZ
), ureg_imm1f(shader
, 0.0f
));
280 for (i
= 0; i
< 3; ++i
) {
281 ureg_IF(shader
, ureg_scalar(ureg_src(t_eb_info
), TGSI_SWIZZLE_X
+ i
), &label
);
283 /* Nouveau can't writemask tex dst regs (yet?), so this won't work anymore on nvidia hardware */
284 if(i
==0 || r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
) {
285 ureg_TEX(shader
, ureg_writemask(texel
, TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_3D
, ureg_src(t_tc
), sampler
[i
]);
287 ureg_TEX(shader
, ureg_writemask(texel
, TGSI_WRITEMASK_X
<< i
), TGSI_TEXTURE_3D
, tc
[2], sampler
[i
]);
290 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
294 ureg_release_temporary(shader
, t_tc
);
295 ureg_release_temporary(shader
, t_eb_info
);
300 static struct ureg_dst
301 fetch_ref(struct ureg_program
*shader
, struct ureg_dst field
)
303 struct ureg_src info
;
304 struct ureg_src tc
[4], sampler
[2];
305 struct ureg_dst ref
[2], result
;
306 unsigned i
, intra_label
, bi_label
, label
;
308 info
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_INFO
, TGSI_INTERPOLATE_CONSTANT
);
310 for (i
= 0; i
< 4; ++i
)
311 tc
[i
] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_MV0
+ i
, TGSI_INTERPOLATE_LINEAR
);
313 for (i
= 0; i
< 2; ++i
) {
314 sampler
[i
] = ureg_DECL_sampler(shader
, i
+ 3);
315 ref
[i
] = ureg_DECL_temporary(shader
);
318 result
= ureg_DECL_temporary(shader
);
320 ureg_MOV(shader
, ureg_writemask(result
, TGSI_WRITEMASK_XYZ
), ureg_imm1f(shader
, 0.5f
));
322 ureg_IF(shader
, ureg_scalar(info
, TGSI_SWIZZLE_Y
), &intra_label
);
323 ureg_CMP(shader
, ureg_writemask(ref
[0], TGSI_WRITEMASK_XY
),
324 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_Y
)),
327 ureg_IF(shader
, ureg_scalar(info
, TGSI_SWIZZLE_X
), &bi_label
);
330 * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
332 ureg_IF(shader
, ureg_scalar(info
, TGSI_SWIZZLE_Z
), &label
);
333 ureg_TEX(shader
, result
, TGSI_TEXTURE_2D
, ureg_src(ref
[0]), sampler
[1]);
334 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
335 ureg_ELSE(shader
, &label
);
336 ureg_TEX(shader
, result
, TGSI_TEXTURE_2D
, ureg_src(ref
[0]), sampler
[0]);
337 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
340 ureg_fixup_label(shader
, bi_label
, ureg_get_instruction_number(shader
));
341 ureg_ELSE(shader
, &bi_label
);
345 * ref[0..1] = tex(tc[0..1], sampler[0..1])
347 * ref[0..1] = tex(tc[2..3], sampler[0..1])
349 ureg_CMP(shader
, ureg_writemask(ref
[1], TGSI_WRITEMASK_XY
),
350 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_Y
)),
352 ureg_TEX(shader
, ref
[0], TGSI_TEXTURE_2D
, ureg_src(ref
[0]), sampler
[0]);
353 ureg_TEX(shader
, ref
[1], TGSI_TEXTURE_2D
, ureg_src(ref
[1]), sampler
[1]);
355 ureg_LRP(shader
, ureg_writemask(result
, TGSI_WRITEMASK_XYZ
), ureg_imm1f(shader
, 0.5f
),
356 ureg_src(ref
[0]), ureg_src(ref
[1]));
358 ureg_fixup_label(shader
, bi_label
, ureg_get_instruction_number(shader
));
360 ureg_fixup_label(shader
, intra_label
, ureg_get_instruction_number(shader
));
363 for (i
= 0; i
< 2; ++i
)
364 ureg_release_temporary(shader
, ref
[i
]);
370 create_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
372 struct ureg_program
*shader
;
373 struct ureg_dst result
;
374 struct ureg_dst field
, texel
;
375 struct ureg_dst fragment
;
377 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
381 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
383 field
= calc_field(shader
);
384 texel
= fetch_ycbcr(r
, shader
, field
);
386 result
= fetch_ref(shader
, field
);
388 ureg_ADD(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_XYZ
), ureg_src(texel
), ureg_src(result
));
390 ureg_release_temporary(shader
, field
);
391 ureg_release_temporary(shader
, texel
);
392 ureg_release_temporary(shader
, result
);
395 return ureg_create_shader_and_destroy(shader
, r
->pipe
);
399 init_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
401 struct pipe_sampler_state sampler
;
402 struct pipe_rasterizer_state rs_state
;
408 r
->viewport
.scale
[0] = r
->buffer_width
;
409 r
->viewport
.scale
[1] = r
->buffer_height
;
410 r
->viewport
.scale
[2] = 1;
411 r
->viewport
.scale
[3] = 1;
412 r
->viewport
.translate
[0] = 0;
413 r
->viewport
.translate
[1] = 0;
414 r
->viewport
.translate
[2] = 0;
415 r
->viewport
.translate
[3] = 0;
417 r
->fb_state
.width
= r
->buffer_width
;
418 r
->fb_state
.height
= r
->buffer_height
;
419 r
->fb_state
.nr_cbufs
= 1;
420 r
->fb_state
.zsbuf
= NULL
;
423 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
425 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
|| true) { //TODO
426 filters
[1] = PIPE_TEX_FILTER_NEAREST
;
427 filters
[2] = PIPE_TEX_FILTER_NEAREST
;
430 filters
[1] = PIPE_TEX_FILTER_LINEAR
;
431 filters
[2] = PIPE_TEX_FILTER_LINEAR
;
433 /* Fwd, bkwd ref filters */
434 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
435 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
437 for (i
= 0; i
< 5; ++i
) {
438 memset(&sampler
, 0, sizeof(sampler
));
439 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
440 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
441 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_BORDER
;
442 sampler
.min_img_filter
= filters
[i
];
443 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
444 sampler
.mag_img_filter
= filters
[i
];
445 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
446 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
447 sampler
.normalized_coords
= 1;
448 /*sampler.shadow_ambient = ; */
449 /*sampler.lod_bias = ; */
451 /*sampler.max_lod = ; */
452 sampler
.border_color
[0] = 0.0f
;
453 sampler
.border_color
[1] = 0.0f
;
454 sampler
.border_color
[2] = 0.0f
;
455 sampler
.border_color
[3] = 0.0f
;
456 /*sampler.max_anisotropy = ; */
457 r
->samplers
.all
[i
] = r
->pipe
->create_sampler_state(r
->pipe
, &sampler
);
460 memset(&rs_state
, 0, sizeof(rs_state
));
461 /*rs_state.sprite_coord_enable */
462 rs_state
.sprite_coord_mode
= PIPE_SPRITE_COORD_UPPER_LEFT
;
463 rs_state
.point_quad_rasterization
= true;
464 rs_state
.point_size
= BLOCK_WIDTH
;
465 rs_state
.gl_rasterization_rules
= true;
466 r
->rs_state
= r
->pipe
->create_rasterizer_state(r
->pipe
, &rs_state
);
472 cleanup_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
478 for (i
= 0; i
< 5; ++i
)
479 r
->pipe
->delete_sampler_state(r
->pipe
, r
->samplers
.all
[i
]);
481 r
->pipe
->delete_rasterizer_state(r
->pipe
, r
->rs_state
);
485 init_buffers(struct vl_mpeg12_mc_renderer
*r
)
487 struct pipe_resource
*idct_matrix
;
490 align(r
->buffer_width
, MACROBLOCK_WIDTH
) / MACROBLOCK_WIDTH
;
492 align(r
->buffer_height
, MACROBLOCK_HEIGHT
) / MACROBLOCK_HEIGHT
;
494 unsigned chroma_width
, chroma_height
, chroma_blocks_x
, chroma_blocks_y
;
498 r
->macroblocks_per_batch
=
499 mbw
* (r
->bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
? mbh
: 1);
501 if (!(idct_matrix
= vl_idct_upload_matrix(r
->pipe
)))
504 if (!vl_idct_init(&r
->idct_y
, r
->pipe
, r
->buffer_width
, r
->buffer_height
,
505 2, 2, TGSI_SWIZZLE_X
, idct_matrix
))
508 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
) {
509 chroma_width
= r
->buffer_width
/ 2;
510 chroma_height
= r
->buffer_height
/ 2;
513 } else if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
) {
514 chroma_width
= r
->buffer_width
;
515 chroma_height
= r
->buffer_height
/ 2;
519 chroma_width
= r
->buffer_width
;
520 chroma_height
= r
->buffer_height
;
525 if(!vl_idct_init(&r
->idct_cr
, r
->pipe
, chroma_width
, chroma_height
,
526 chroma_blocks_x
, chroma_blocks_y
, TGSI_SWIZZLE_Y
, idct_matrix
))
529 if(!vl_idct_init(&r
->idct_cb
, r
->pipe
, chroma_width
, chroma_height
,
530 chroma_blocks_x
, chroma_blocks_y
, TGSI_SWIZZLE_Z
, idct_matrix
))
533 r
->quad
= vl_vb_upload_quads(r
->pipe
, 1, 1);
534 r
->vertex_elems_state
= vl_vb_get_elems_state(r
->pipe
, true);
536 if (r
->vertex_elems_state
== NULL
)
539 r
->vs
= create_vert_shader(r
);
540 r
->fs
= create_frag_shader(r
);
542 if (r
->vs
== NULL
|| r
->fs
== NULL
)
549 cleanup_buffers(struct vl_mpeg12_mc_renderer
*r
)
553 r
->pipe
->delete_vs_state(r
->pipe
, r
->vs
);
554 r
->pipe
->delete_fs_state(r
->pipe
, r
->fs
);
556 vl_idct_cleanup(&r
->idct_y
);
557 vl_idct_cleanup(&r
->idct_cr
);
558 vl_idct_cleanup(&r
->idct_cb
);
560 r
->pipe
->delete_vertex_elements_state(r
->pipe
, r
->vertex_elems_state
);
563 static struct pipe_sampler_view
564 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer
*r
, struct pipe_surface
*surface
)
566 struct pipe_sampler_view
*sampler_view
;
570 sampler_view
= (struct pipe_sampler_view
*)util_keymap_lookup(r
->texview_map
, &surface
);
572 struct pipe_sampler_view templat
;
573 boolean added_to_map
;
575 u_sampler_view_default_template(&templat
, surface
->texture
,
576 surface
->texture
->format
);
577 sampler_view
= r
->pipe
->create_sampler_view(r
->pipe
, surface
->texture
,
582 added_to_map
= util_keymap_insert(r
->texview_map
, &surface
,
583 sampler_view
, r
->pipe
);
584 assert(added_to_map
);
591 grab_blocks(struct vl_mpeg12_mc_renderer
*r
,
592 struct vl_mpeg12_mc_buffer
*buffer
,
593 unsigned mbx
, unsigned mby
,
594 unsigned cbp
, short *blocks
)
602 for (y
= 0; y
< 2; ++y
) {
603 for (x
= 0; x
< 2; ++x
, ++tb
) {
604 if (cbp
& (*r
->empty_block_mask
)[0][y
][x
]) {
605 vl_idct_add_block(&buffer
->idct_y
, mbx
* 2 + x
, mby
* 2 + y
, blocks
);
606 blocks
+= BLOCK_WIDTH
* BLOCK_HEIGHT
;
611 /* TODO: Implement 422, 444 */
612 assert(r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
614 for (tb
= 1; tb
< 3; ++tb
) {
615 if (cbp
& (*r
->empty_block_mask
)[tb
][0][0]) {
617 vl_idct_add_block(&buffer
->idct_cb
, mbx
, mby
, blocks
);
619 vl_idct_add_block(&buffer
->idct_cr
, mbx
, mby
, blocks
);
620 blocks
+= BLOCK_WIDTH
* BLOCK_HEIGHT
;
626 grab_macroblock(struct vl_mpeg12_mc_renderer
*r
,
627 struct vl_mpeg12_mc_buffer
*buffer
,
628 struct pipe_mpeg12_macroblock
*mb
)
634 vl_vb_add_block(&buffer
->vertex_stream
, mb
, r
->empty_block_mask
);
635 grab_blocks(r
, buffer
, mb
->mbx
, mb
->mby
, mb
->cbp
, mb
->blocks
);
639 texview_map_delete(const struct keymap
*map
,
640 const void *key
, void *data
,
643 struct pipe_sampler_view
*sv
= (struct pipe_sampler_view
*)data
;
650 pipe_sampler_view_reference(&sv
, NULL
);
654 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer
*renderer
,
655 struct pipe_context
*pipe
,
656 unsigned buffer_width
,
657 unsigned buffer_height
,
658 enum pipe_video_chroma_format chroma_format
,
659 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode
)
664 /* TODO: Implement other policies */
665 assert(bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
);
667 memset(renderer
, 0, sizeof(struct vl_mpeg12_mc_renderer
));
669 renderer
->pipe
= pipe
;
670 renderer
->buffer_width
= buffer_width
;
671 renderer
->buffer_height
= buffer_height
;
672 renderer
->chroma_format
= chroma_format
;
673 renderer
->bufmode
= bufmode
;
675 /* TODO: Implement 422, 444 */
676 assert(chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
677 renderer
->empty_block_mask
= &const_empty_block_mask_420
;
679 renderer
->texview_map
= util_new_keymap(sizeof(struct pipe_surface
*), -1,
681 if (!renderer
->texview_map
)
684 if (!init_pipe_state(renderer
))
685 goto error_pipe_state
;
687 if (!init_buffers(renderer
))
693 cleanup_pipe_state(renderer
);
696 util_delete_keymap(renderer
->texview_map
, renderer
->pipe
);
701 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer
*renderer
)
705 util_delete_keymap(renderer
->texview_map
, renderer
->pipe
);
706 cleanup_pipe_state(renderer
);
707 cleanup_buffers(renderer
);
711 vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer
*renderer
, struct vl_mpeg12_mc_buffer
*buffer
)
713 struct pipe_resource
template;
714 struct pipe_sampler_view sampler_view
;
718 assert(renderer
&& buffer
);
720 buffer
->surface
= NULL
;
722 buffer
->future
= NULL
;
724 buffer
->vertex_bufs
.individual
.quad
.stride
= renderer
->quad
.stride
;
725 buffer
->vertex_bufs
.individual
.quad
.buffer_offset
= renderer
->quad
.buffer_offset
;
726 pipe_resource_reference(&buffer
->vertex_bufs
.individual
.quad
.buffer
, renderer
->quad
.buffer
);
728 buffer
->vertex_bufs
.individual
.stream
= vl_vb_init(&buffer
->vertex_stream
, renderer
->pipe
,
729 renderer
->macroblocks_per_batch
);
731 memset(&template, 0, sizeof(struct pipe_resource
));
732 template.target
= PIPE_TEXTURE_2D
;
733 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
734 template.format
= PIPE_FORMAT_R16_SNORM
;
735 template.last_level
= 0;
736 template.width0
= renderer
->buffer_width
;
737 template.height0
= renderer
->buffer_height
;
739 template.array_size
= 1;
740 template.usage
= PIPE_USAGE_STATIC
;
741 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
744 buffer
->textures
.individual
.y
= renderer
->pipe
->screen
->resource_create(renderer
->pipe
->screen
, &template);
746 if (!vl_idct_init_buffer(&renderer
->idct_y
, &buffer
->idct_y
,
747 buffer
->textures
.individual
.y
,
748 buffer
->vertex_bufs
.individual
.stream
))
751 if (renderer
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
) {
752 template.width0
= renderer
->buffer_width
/ 2;
753 template.height0
= renderer
->buffer_height
/ 2;
755 else if (renderer
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
)
756 template.height0
= renderer
->buffer_height
/ 2;
758 buffer
->textures
.individual
.cb
=
759 renderer
->pipe
->screen
->resource_create(renderer
->pipe
->screen
, &template);
760 buffer
->textures
.individual
.cr
=
761 renderer
->pipe
->screen
->resource_create(renderer
->pipe
->screen
, &template);
763 if (!vl_idct_init_buffer(&renderer
->idct_cb
, &buffer
->idct_cb
,
764 buffer
->textures
.individual
.cb
,
765 buffer
->vertex_bufs
.individual
.stream
))
768 if (!vl_idct_init_buffer(&renderer
->idct_cr
, &buffer
->idct_cr
,
769 buffer
->textures
.individual
.cr
,
770 buffer
->vertex_bufs
.individual
.stream
))
773 for (i
= 0; i
< 3; ++i
) {
774 u_sampler_view_default_template(&sampler_view
,
775 buffer
->textures
.all
[i
],
776 buffer
->textures
.all
[i
]->format
);
777 sampler_view
.swizzle_r
= i
== 0 ? PIPE_SWIZZLE_RED
: PIPE_SWIZZLE_ZERO
;
778 sampler_view
.swizzle_g
= i
== 1 ? PIPE_SWIZZLE_RED
: PIPE_SWIZZLE_ZERO
;
779 sampler_view
.swizzle_b
= i
== 2 ? PIPE_SWIZZLE_RED
: PIPE_SWIZZLE_ZERO
;
780 sampler_view
.swizzle_a
= PIPE_SWIZZLE_ONE
;
781 buffer
->sampler_views
.all
[i
] = renderer
->pipe
->create_sampler_view(
782 renderer
->pipe
, buffer
->textures
.all
[i
], &sampler_view
);
789 vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer
*renderer
, struct vl_mpeg12_mc_buffer
*buffer
)
793 assert(renderer
&& buffer
);
795 for (i
= 0; i
< 3; ++i
) {
796 pipe_sampler_view_reference(&buffer
->sampler_views
.all
[i
], NULL
);
797 pipe_resource_reference(&buffer
->vertex_bufs
.all
[i
].buffer
, NULL
);
798 pipe_resource_reference(&buffer
->textures
.all
[i
], NULL
);
801 pipe_resource_reference(&buffer
->vertex_bufs
.individual
.quad
.buffer
, NULL
);
802 vl_vb_cleanup(&buffer
->vertex_stream
);
804 vl_idct_cleanup_buffer(&renderer
->idct_y
, &buffer
->idct_y
);
805 vl_idct_cleanup_buffer(&renderer
->idct_cb
, &buffer
->idct_cb
);
806 vl_idct_cleanup_buffer(&renderer
->idct_cr
, &buffer
->idct_cr
);
808 pipe_surface_reference(&buffer
->surface
, NULL
);
809 pipe_surface_reference(&buffer
->past
, NULL
);
810 pipe_surface_reference(&buffer
->future
, NULL
);
814 vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer
*renderer
, struct vl_mpeg12_mc_buffer
*buffer
)
816 assert(renderer
&& buffer
);
818 vl_idct_map_buffers(&renderer
->idct_y
, &buffer
->idct_y
);
819 vl_idct_map_buffers(&renderer
->idct_cr
, &buffer
->idct_cr
);
820 vl_idct_map_buffers(&renderer
->idct_cb
, &buffer
->idct_cb
);
822 vl_vb_map(&buffer
->vertex_stream
, renderer
->pipe
);
826 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
*renderer
,
827 struct vl_mpeg12_mc_buffer
*buffer
,
828 struct pipe_surface
*surface
,
829 struct pipe_surface
*past
,
830 struct pipe_surface
*future
,
831 unsigned num_macroblocks
,
832 struct pipe_mpeg12_macroblock
*mpeg12_macroblocks
,
833 struct pipe_fence_handle
**fence
)
837 assert(renderer
&& buffer
);
839 assert(num_macroblocks
);
840 assert(mpeg12_macroblocks
);
842 if (surface
!= buffer
->surface
) {
843 pipe_surface_reference(&buffer
->surface
, surface
);
844 pipe_surface_reference(&buffer
->past
, past
);
845 pipe_surface_reference(&buffer
->future
, future
);
846 buffer
->fence
= fence
;
848 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
849 assert(buffer
->past
== past
);
850 assert(buffer
->future
== future
);
853 for (i
= 0; i
< num_macroblocks
; ++i
) {
854 assert(mpeg12_macroblocks
[i
].base
.codec
== PIPE_VIDEO_CODEC_MPEG12
);
855 grab_macroblock(renderer
, buffer
, &mpeg12_macroblocks
[i
]);
860 vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer
*renderer
, struct vl_mpeg12_mc_buffer
*buffer
)
862 assert(renderer
&& buffer
);
864 vl_idct_unmap_buffers(&renderer
->idct_y
, &buffer
->idct_y
);
865 vl_idct_unmap_buffers(&renderer
->idct_cr
, &buffer
->idct_cr
);
866 vl_idct_unmap_buffers(&renderer
->idct_cb
, &buffer
->idct_cb
);
868 vl_vb_unmap(&buffer
->vertex_stream
, renderer
->pipe
);
872 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer
*renderer
, struct vl_mpeg12_mc_buffer
*buffer
)
874 unsigned num_not_empty
, num_empty
;
875 assert(renderer
&& buffer
);
877 num_not_empty
= buffer
->vertex_stream
.num_not_empty
;
878 num_empty
= buffer
->vertex_stream
.num_empty
;
880 if (num_not_empty
== 0 && num_empty
== 0)
883 vl_idct_flush(&renderer
->idct_y
, &buffer
->idct_y
, num_not_empty
);
884 vl_idct_flush(&renderer
->idct_cr
, &buffer
->idct_cr
, num_not_empty
);
885 vl_idct_flush(&renderer
->idct_cb
, &buffer
->idct_cb
, num_not_empty
);
887 renderer
->fb_state
.cbufs
[0] = buffer
->surface
;
888 renderer
->pipe
->bind_rasterizer_state(renderer
->pipe
, renderer
->rs_state
);
889 renderer
->pipe
->set_framebuffer_state(renderer
->pipe
, &renderer
->fb_state
);
890 renderer
->pipe
->set_viewport_state(renderer
->pipe
, &renderer
->viewport
);
891 renderer
->pipe
->set_vertex_buffers(renderer
->pipe
, 2, buffer
->vertex_bufs
.all
);
892 renderer
->pipe
->bind_vertex_elements_state(renderer
->pipe
, renderer
->vertex_elems_state
);
895 buffer
->sampler_views
.individual
.ref
[0] = find_or_create_sampler_view(renderer
, buffer
->past
);
897 buffer
->sampler_views
.individual
.ref
[0] = find_or_create_sampler_view(renderer
, buffer
->surface
);
900 if (buffer
->future
) {
901 buffer
->sampler_views
.individual
.ref
[1] = find_or_create_sampler_view(renderer
, buffer
->future
);
903 buffer
->sampler_views
.individual
.ref
[1] = find_or_create_sampler_view(renderer
, buffer
->surface
);
906 renderer
->pipe
->set_fragment_sampler_views(renderer
->pipe
, 5, buffer
->sampler_views
.all
);
907 renderer
->pipe
->bind_fragment_sampler_states(renderer
->pipe
, 5, renderer
->samplers
.all
);
909 renderer
->pipe
->bind_vs_state(renderer
->pipe
, renderer
->vs
);
910 renderer
->pipe
->bind_fs_state(renderer
->pipe
, renderer
->fs
);
912 if (num_not_empty
> 0)
913 util_draw_arrays_instanced(renderer
->pipe
, PIPE_PRIM_QUADS
, 0, 4, 0, num_not_empty
);
916 util_draw_arrays_instanced(renderer
->pipe
, PIPE_PRIM_QUADS
, 0, 4,
917 buffer
->vertex_stream
.size
- num_empty
, num_empty
);
919 renderer
->pipe
->flush(renderer
->pipe
, PIPE_FLUSH_RENDER_CACHE
, buffer
->fence
);
921 /* Next time we get this surface it may have new ref frames */
922 pipe_surface_reference(&buffer
->surface
, NULL
);
923 pipe_surface_reference(&buffer
->past
, NULL
);
924 pipe_surface_reference(&buffer
->future
, NULL
);
926 vl_vb_restart(&buffer
->vertex_stream
);