1 /**************************************************************************
3 * Copyright 2009 Younes Manton.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
44 #define BLOCK_HEIGHT 8
46 struct vertex_stream_0
99 create_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
101 struct ureg_program
*shader
;
102 struct ureg_src scale
;
103 struct ureg_src vrect
, vpos
, eb
[2][2], vmv
[4];
104 struct ureg_src interlaced
, frame_pred
, ref_frames
, bkwd_pred
;
105 struct ureg_dst t_vpos
, t_vtex
, t_vmv
;
106 struct ureg_dst o_vpos
, o_line
, o_vtex
[3], o_eb
[2], o_vmv
[4];
107 struct ureg_dst o_frame_pred
, o_ref_frames
, o_bkwd_pred
;
110 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
114 t_vpos
= ureg_DECL_temporary(shader
);
115 t_vtex
= ureg_DECL_temporary(shader
);
116 t_vmv
= ureg_DECL_temporary(shader
);
118 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
119 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
120 eb
[0][0] = ureg_DECL_vs_input(shader
, VS_I_EB_0_0
);
121 eb
[1][0] = ureg_DECL_vs_input(shader
, VS_I_EB_1_0
);
122 eb
[0][1] = ureg_DECL_vs_input(shader
, VS_I_EB_0_1
);
123 eb
[1][1] = ureg_DECL_vs_input(shader
, VS_I_EB_1_1
);
124 interlaced
= ureg_DECL_vs_input(shader
, VS_I_INTERLACED
);
125 frame_pred
= ureg_DECL_vs_input(shader
, VS_I_FRAME_PRED
);
126 ref_frames
= ureg_DECL_vs_input(shader
, VS_I_REF_FRAMES
);
127 bkwd_pred
= ureg_DECL_vs_input(shader
, VS_I_BKWD_PRED
);
129 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
130 o_line
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_LINE
);
131 o_vtex
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX0
);
132 o_vtex
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX1
);
133 o_vtex
[2] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX2
);
134 o_eb
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_0
);
135 o_eb
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_1
);
136 o_frame_pred
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_FRAME_PRED
);
137 o_ref_frames
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_REF_FRAMES
);
138 o_bkwd_pred
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_BKWD_PRED
);
140 for (i
= 0; i
< 4; ++i
) {
141 vmv
[i
] = ureg_DECL_vs_input(shader
, VS_I_MV0
+ i
);
142 o_vmv
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_MV0
+ i
);
146 * scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
148 * t_vpos = (vpos + vrect) * scale
152 * o_line.xy = vrect * 8
153 * o_line.z = interlaced
157 * t_vtex.y = vrect.y * 0.5
160 * o_vtex[0].xy = t_vtex * scale
163 * o_vtex[1].xy = t_vtex * scale
165 * o_vtex[0..1].xy = t_vpos
167 * o_vtex[2].xy = t_vpos
169 * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
171 * o_frame_pred = frame_pred
172 * o_ref_frames = ref_frames
173 * o_bkwd_pred = bkwd_pred
175 * // Apply motion vectors
176 * scale = 0.5 / (dst.width, dst.height);
177 * o_vmv[0..count] = t_vpos + vmv[0..count] * scale
180 scale
= ureg_imm2f(shader
,
181 (float)MACROBLOCK_WIDTH
/ r
->buffer_width
,
182 (float)MACROBLOCK_HEIGHT
/ r
->buffer_height
);
184 ureg_ADD(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
185 ureg_MUL(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
), scale
);
186 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
187 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), vpos
);
189 ureg_MUL(shader
, ureg_writemask(o_line
, TGSI_WRITEMASK_XY
), vrect
,
190 ureg_imm2f(shader
, MACROBLOCK_WIDTH
/ 2, MACROBLOCK_HEIGHT
/ 2));
191 ureg_MOV(shader
, ureg_writemask(o_line
, TGSI_WRITEMASK_Z
),
192 ureg_scalar(interlaced
, TGSI_SWIZZLE_X
));
194 ureg_IF(shader
, interlaced
, &label
);
196 ureg_MOV(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_X
), vrect
);
197 ureg_MUL(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_Y
), vrect
, ureg_imm1f(shader
, 0.5f
));
198 ureg_ADD(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_XY
), vpos
, ureg_src(t_vtex
));
199 ureg_MUL(shader
, ureg_writemask(o_vtex
[0], TGSI_WRITEMASK_XY
), ureg_src(t_vtex
), scale
);
200 ureg_ADD(shader
, ureg_writemask(t_vtex
, TGSI_WRITEMASK_Y
), ureg_src(t_vtex
), ureg_imm1f(shader
, 0.5f
));
201 ureg_MUL(shader
, ureg_writemask(o_vtex
[1], TGSI_WRITEMASK_XY
), ureg_src(t_vtex
), scale
);
203 ureg_ELSE(shader
, &label
);
205 ureg_MOV(shader
, ureg_writemask(o_vtex
[0], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
206 ureg_MOV(shader
, ureg_writemask(o_vtex
[1], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
209 ureg_MOV(shader
, ureg_writemask(o_vtex
[2], TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
211 ureg_CMP(shader
, ureg_writemask(o_eb
[0], TGSI_WRITEMASK_XYZ
),
212 ureg_negate(ureg_scalar(vrect
, TGSI_SWIZZLE_X
)),
214 ureg_CMP(shader
, ureg_writemask(o_eb
[1], TGSI_WRITEMASK_XYZ
),
215 ureg_negate(ureg_scalar(vrect
, TGSI_SWIZZLE_X
)),
218 ureg_MOV(shader
, ureg_writemask(o_frame_pred
, TGSI_WRITEMASK_X
), frame_pred
);
219 ureg_MOV(shader
, ureg_writemask(o_ref_frames
, TGSI_WRITEMASK_X
), ref_frames
);
220 ureg_MOV(shader
, ureg_writemask(o_bkwd_pred
, TGSI_WRITEMASK_X
), bkwd_pred
);
222 scale
= ureg_imm2f(shader
,
223 0.5f
/ r
->buffer_width
,
224 0.5f
/ r
->buffer_height
);
226 for (i
= 0; i
< 4; i
++)
227 ureg_MAD(shader
, ureg_writemask(o_vmv
[i
], TGSI_WRITEMASK_XY
), scale
, vmv
[i
], ureg_src(t_vpos
));
229 ureg_release_temporary(shader
, t_vtex
);
230 ureg_release_temporary(shader
, t_vpos
);
231 ureg_release_temporary(shader
, t_vmv
);
235 return ureg_create_shader_and_destroy(shader
, r
->pipe
);
238 static struct ureg_dst
239 calc_field(struct ureg_program
*shader
)
242 struct ureg_src line
, frame_pred
;
244 tmp
= ureg_DECL_temporary(shader
);
246 line
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_LINE
, TGSI_INTERPOLATE_LINEAR
);
247 frame_pred
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_FRAME_PRED
, TGSI_INTERPOLATE_CONSTANT
);
250 * line.xy going from 0 to 8 in steps of 0.5
251 * line.z flag that controls interlacing
253 * tmp.z = fraction(line.y)
254 * tmp.z = tmp.z >= 0.5 ? 1 : 0
255 * tmp.xy = line >= 4 ? 1 : 0
256 * tmp.w = line.z ? tmp.z : tmp.y
257 * tmp.z = frame_pred ? 0.0f : tmp.z
259 ureg_FRC(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_scalar(line
, TGSI_SWIZZLE_Y
));
260 ureg_SGE(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
), ureg_src(tmp
), ureg_imm1f(shader
, 0.5f
));
261 ureg_SGE(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), line
, ureg_imm2f(shader
, BLOCK_WIDTH
/ 2, BLOCK_HEIGHT
/ 2));
263 ureg_CMP(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
),
264 ureg_negate(ureg_scalar(line
, TGSI_SWIZZLE_Z
)),
265 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
),
266 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
268 ureg_CMP(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Z
),
269 ureg_negate(ureg_scalar(frame_pred
, TGSI_SWIZZLE_X
)),
270 ureg_imm1f(shader
, 0.0f
),
271 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Z
));
276 static struct ureg_dst
277 fetch_ycbcr(struct vl_mpeg12_mc_renderer
*r
, struct ureg_program
*shader
, struct ureg_dst field
)
279 struct ureg_src tc
[3], sampler
[3], eb
[2];
280 struct ureg_dst texel
, t_tc
, t_eb_info
, tmp
;
283 texel
= ureg_DECL_temporary(shader
);
284 t_tc
= ureg_DECL_temporary(shader
);
285 t_eb_info
= ureg_DECL_temporary(shader
);
286 tmp
= ureg_DECL_temporary(shader
);
288 tc
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX0
, TGSI_INTERPOLATE_LINEAR
);
289 tc
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX1
, TGSI_INTERPOLATE_LINEAR
);
290 tc
[2] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX2
, TGSI_INTERPOLATE_LINEAR
);
292 eb
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_0
, TGSI_INTERPOLATE_CONSTANT
);
293 eb
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_EB_1
, TGSI_INTERPOLATE_CONSTANT
);
295 for (i
= 0; i
< 3; ++i
) {
296 sampler
[i
] = ureg_DECL_sampler(shader
, i
);
300 * texel.y = tex(field.y ? tc[1] : tc[0], sampler[0])
301 * texel.cb = tex(tc[2], sampler[1])
302 * texel.cr = tex(tc[2], sampler[2])
305 ureg_CMP(shader
, ureg_writemask(t_tc
, TGSI_WRITEMASK_XY
),
306 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_W
)),
309 ureg_CMP(shader
, ureg_writemask(t_eb_info
, TGSI_WRITEMASK_XYZ
),
310 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_W
)),
313 /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
314 ureg_SGE(shader
, ureg_writemask(t_eb_info
, TGSI_WRITEMASK_XYZ
), ureg_src(t_eb_info
), ureg_imm1f(shader
, 0.5f
));
316 for (i
= 0; i
< 3; ++i
) {
317 ureg_IF(shader
, ureg_scalar(ureg_src(t_eb_info
), TGSI_SWIZZLE_X
+ i
), &label
);
318 ureg_MOV(shader
, ureg_writemask(texel
, TGSI_WRITEMASK_X
<< i
), ureg_imm1f(shader
, 0.0f
));
319 ureg_ELSE(shader
, &label
);
321 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
322 if(i
==0 || r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
) {
323 ureg_TEX(shader
, tmp
, TGSI_TEXTURE_3D
, ureg_src(t_tc
), sampler
[i
]);
325 ureg_TEX(shader
, tmp
, TGSI_TEXTURE_3D
, tc
[2], sampler
[i
]);
328 ureg_MOV(shader
, ureg_writemask(texel
, TGSI_WRITEMASK_X
<< i
), ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
));
333 ureg_release_temporary(shader
, t_tc
);
334 ureg_release_temporary(shader
, t_eb_info
);
335 ureg_release_temporary(shader
, tmp
);
340 static struct ureg_dst
341 fetch_ref(struct ureg_program
*shader
, struct ureg_dst field
)
343 struct ureg_src ref_frames
, bkwd_pred
;
344 struct ureg_src tc
[4], sampler
[2];
345 struct ureg_dst ref
[2], t_tc
, result
;
346 unsigned i
, intra_label
, bi_label
, label
;
348 ref_frames
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_REF_FRAMES
, TGSI_INTERPOLATE_CONSTANT
);
349 bkwd_pred
= ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_BKWD_PRED
, TGSI_INTERPOLATE_CONSTANT
);
351 for (i
= 0; i
< 4; ++i
)
352 tc
[i
] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_MV0
+ i
, TGSI_INTERPOLATE_LINEAR
);
354 for (i
= 0; i
< 2; ++i
) {
355 sampler
[i
] = ureg_DECL_sampler(shader
, i
+ 3);
356 ref
[i
] = ureg_DECL_temporary(shader
);
359 result
= ureg_DECL_temporary(shader
);
361 ureg_SEQ(shader
, ureg_writemask(result
, TGSI_WRITEMASK_X
), ref_frames
, ureg_imm1f(shader
, -1.0f
));
362 ureg_IF(shader
, ureg_scalar(ureg_src(result
), TGSI_SWIZZLE_X
), &intra_label
);
363 ureg_MOV(shader
, result
, ureg_imm1f(shader
, 0.5f
));
365 ureg_ELSE(shader
, &intra_label
);
366 ureg_IF(shader
, ureg_scalar(ref_frames
, TGSI_SWIZZLE_X
), &bi_label
);
368 t_tc
= ureg_DECL_temporary(shader
);
370 * result = tex(field.z ? tc[1] : tc[0], sampler[0])
372 ureg_CMP(shader
, ureg_writemask(t_tc
, TGSI_WRITEMASK_XY
),
373 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_Z
)),
376 ureg_IF(shader
, bkwd_pred
, &label
);
377 ureg_TEX(shader
, result
, TGSI_TEXTURE_2D
, ureg_src(t_tc
), sampler
[1]);
378 ureg_ELSE(shader
, &label
);
379 ureg_TEX(shader
, result
, TGSI_TEXTURE_2D
, ureg_src(t_tc
), sampler
[0]);
382 ureg_release_temporary(shader
, t_tc
);
384 ureg_ELSE(shader
, &bi_label
);
386 t_tc
= ureg_DECL_temporary(shader
);
390 * ref[0..1] = tex(tc[0..1], sampler[0..1])
392 * ref[0..1] = tex(tc[2..3], sampler[0..1])
394 ureg_CMP(shader
, ureg_writemask(t_tc
, TGSI_WRITEMASK_XY
),
395 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_Z
)),
397 ureg_TEX(shader
, ref
[0], TGSI_TEXTURE_2D
, ureg_src(t_tc
), sampler
[0]);
399 ureg_CMP(shader
, ureg_writemask(t_tc
, TGSI_WRITEMASK_XY
),
400 ureg_negate(ureg_scalar(ureg_src(field
), TGSI_SWIZZLE_Z
)),
402 ureg_TEX(shader
, ref
[1], TGSI_TEXTURE_2D
, ureg_src(t_tc
), sampler
[1]);
404 ureg_release_temporary(shader
, t_tc
);
406 ureg_LRP(shader
, result
, ureg_scalar(ureg_imm1f(shader
, 0.5f
), TGSI_SWIZZLE_X
), ureg_src(ref
[0]), ureg_src(ref
[1]));
411 for (i
= 0; i
< 2; ++i
)
412 ureg_release_temporary(shader
, ref
[i
]);
418 create_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
420 struct ureg_program
*shader
;
421 struct ureg_dst result
;
422 struct ureg_dst field
, texel
;
423 struct ureg_dst fragment
;
425 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
429 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
431 field
= calc_field(shader
);
432 texel
= fetch_ycbcr(r
, shader
, field
);
434 result
= fetch_ref(shader
, field
);
436 ureg_ADD(shader
, fragment
, ureg_src(texel
), ureg_src(result
));
438 ureg_release_temporary(shader
, field
);
439 ureg_release_temporary(shader
, texel
);
440 ureg_release_temporary(shader
, result
);
443 return ureg_create_shader_and_destroy(shader
, r
->pipe
);
447 init_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
449 struct pipe_sampler_state sampler
;
450 struct pipe_rasterizer_state rs_state
;
456 r
->viewport
.scale
[0] = r
->buffer_width
;
457 r
->viewport
.scale
[1] = r
->buffer_height
;
458 r
->viewport
.scale
[2] = 1;
459 r
->viewport
.scale
[3] = 1;
460 r
->viewport
.translate
[0] = 0;
461 r
->viewport
.translate
[1] = 0;
462 r
->viewport
.translate
[2] = 0;
463 r
->viewport
.translate
[3] = 0;
465 r
->fb_state
.width
= r
->buffer_width
;
466 r
->fb_state
.height
= r
->buffer_height
;
467 r
->fb_state
.nr_cbufs
= 1;
468 r
->fb_state
.zsbuf
= NULL
;
471 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
473 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
|| true) { //TODO
474 filters
[1] = PIPE_TEX_FILTER_NEAREST
;
475 filters
[2] = PIPE_TEX_FILTER_NEAREST
;
478 filters
[1] = PIPE_TEX_FILTER_LINEAR
;
479 filters
[2] = PIPE_TEX_FILTER_LINEAR
;
481 /* Fwd, bkwd ref filters */
482 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
483 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
485 for (i
= 0; i
< 5; ++i
) {
486 memset(&sampler
, 0, sizeof(sampler
));
487 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
488 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
489 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_BORDER
;
490 sampler
.min_img_filter
= filters
[i
];
491 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
492 sampler
.mag_img_filter
= filters
[i
];
493 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
494 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
495 sampler
.normalized_coords
= 1;
496 /*sampler.shadow_ambient = ; */
497 /*sampler.lod_bias = ; */
499 /*sampler.max_lod = ; */
500 sampler
.border_color
[0] = 0.0f
;
501 sampler
.border_color
[1] = 0.0f
;
502 sampler
.border_color
[2] = 0.0f
;
503 sampler
.border_color
[3] = 0.0f
;
504 /*sampler.max_anisotropy = ; */
505 r
->samplers
.all
[i
] = r
->pipe
->create_sampler_state(r
->pipe
, &sampler
);
508 memset(&rs_state
, 0, sizeof(rs_state
));
509 /*rs_state.sprite_coord_enable */
510 rs_state
.sprite_coord_mode
= PIPE_SPRITE_COORD_UPPER_LEFT
;
511 rs_state
.point_quad_rasterization
= true;
512 rs_state
.point_size
= BLOCK_WIDTH
;
513 rs_state
.gl_rasterization_rules
= true;
514 r
->rs_state
= r
->pipe
->create_rasterizer_state(r
->pipe
, &rs_state
);
520 cleanup_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
526 for (i
= 0; i
< 5; ++i
)
527 r
->pipe
->delete_sampler_state(r
->pipe
, r
->samplers
.all
[i
]);
529 r
->pipe
->delete_rasterizer_state(r
->pipe
, r
->rs_state
);
533 init_buffers(struct vl_mpeg12_mc_renderer
*r
)
535 struct pipe_resource
*idct_matrix
;
536 struct pipe_resource
template;
537 struct pipe_vertex_element vertex_elems
[NUM_VS_INPUTS
];
538 struct pipe_sampler_view sampler_view
;
541 align(r
->buffer_width
, MACROBLOCK_WIDTH
) / MACROBLOCK_WIDTH
;
543 align(r
->buffer_height
, MACROBLOCK_HEIGHT
) / MACROBLOCK_HEIGHT
;
549 r
->macroblocks_per_batch
=
550 mbw
* (r
->bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
? mbh
: 1);
551 r
->num_macroblocks
= 0;
553 memset(&template, 0, sizeof(struct pipe_resource
));
554 template.target
= PIPE_TEXTURE_2D
;
555 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
556 template.format
= PIPE_FORMAT_R16_SNORM
;
557 template.last_level
= 0;
558 template.width0
= r
->buffer_width
;
559 template.height0
= r
->buffer_height
;
561 template.usage
= PIPE_USAGE_DYNAMIC
;
562 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
565 r
->textures
.individual
.y
= r
->pipe
->screen
->resource_create(r
->pipe
->screen
, &template);
567 if (!(idct_matrix
= vl_idct_upload_matrix(r
->pipe
)))
570 if (!vl_idct_init(&r
->idct_luma
, r
->pipe
, r
->buffer_width
, r
->buffer_height
, idct_matrix
))
573 if (!vl_idct_init_buffer(&r
->idct_luma
, &r
->idct_y
, r
->textures
.individual
.y
))
576 vl_idct_map_buffers(&r
->idct_luma
, &r
->idct_y
);
578 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
) {
579 template.width0
= r
->buffer_width
/ 2;
580 template.height0
= r
->buffer_height
/ 2;
582 else if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
)
583 template.height0
= r
->buffer_height
/ 2;
585 r
->textures
.individual
.cb
=
586 r
->pipe
->screen
->resource_create(r
->pipe
->screen
, &template);
587 r
->textures
.individual
.cr
=
588 r
->pipe
->screen
->resource_create(r
->pipe
->screen
, &template);
590 if(!vl_idct_init(&r
->idct_chroma
, r
->pipe
, template.width0
, template.height0
, idct_matrix
))
593 if (!vl_idct_init_buffer(&r
->idct_chroma
, &r
->idct_cb
, r
->textures
.individual
.cb
))
596 vl_idct_map_buffers(&r
->idct_chroma
, &r
->idct_cb
);
598 if (!vl_idct_init_buffer(&r
->idct_chroma
, &r
->idct_cr
, r
->textures
.individual
.cr
))
601 vl_idct_map_buffers(&r
->idct_chroma
, &r
->idct_cr
);
603 for (i
= 0; i
< 3; ++i
) {
604 u_sampler_view_default_template(&sampler_view
,
606 r
->textures
.all
[i
]->format
);
607 r
->sampler_views
.all
[i
] = r
->pipe
->create_sampler_view(r
->pipe
, r
->textures
.all
[i
], &sampler_view
);
610 memset(&vertex_elems
, 0, sizeof(vertex_elems
));
612 vertex_elems
[VS_I_RECT
] = vl_vb_get_quad_vertex_element();
613 r
->vertex_bufs
.individual
.quad
= vl_vb_upload_quads(r
->pipe
, r
->macroblocks_per_batch
);
615 /* Position element */
616 vertex_elems
[VS_I_VPOS
].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
618 /* y, cr, cb empty block element top left block */
619 vertex_elems
[VS_I_EB_0_0
].src_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
621 /* y, cr, cb empty block element top right block */
622 vertex_elems
[VS_I_EB_0_1
].src_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
624 /* y, cr, cb empty block element bottom left block */
625 vertex_elems
[VS_I_EB_1_0
].src_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
627 /* y, cr, cb empty block element bottom right block */
628 vertex_elems
[VS_I_EB_1_1
].src_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
630 /* progressive=0.0f interlaced=1.0f */
631 vertex_elems
[VS_I_INTERLACED
].src_format
= PIPE_FORMAT_R32_FLOAT
;
633 /* frame=0.0f field=1.0f */
634 vertex_elems
[VS_I_FRAME_PRED
].src_format
= PIPE_FORMAT_R32_FLOAT
;
636 /* intra=-1.0f forward/backward=1.0f bi=0.0f */
637 vertex_elems
[VS_I_REF_FRAMES
].src_format
= PIPE_FORMAT_R32_FLOAT
;
639 /* forward=0.0f backward=1.0f */
640 vertex_elems
[VS_I_BKWD_PRED
].src_format
= PIPE_FORMAT_R32_FLOAT
;
642 stride
= vl_vb_element_helper(&vertex_elems
[VS_I_VPOS
], 9, 1);
644 r
->vertex_bufs
.individual
.pos
= vl_vb_init(
645 &r
->pos
, r
->pipe
, r
->macroblocks_per_batch
,
646 sizeof(struct vertex_stream_0
) / sizeof(float),
649 for (i
= 0; i
< 4; ++i
) {
650 /* motion vector 0..4 element */
651 vertex_elems
[VS_I_MV0
+ i
].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
652 stride
= vl_vb_element_helper(&vertex_elems
[VS_I_MV0
+ i
], 1, i
+ 2);
653 r
->vertex_bufs
.individual
.mv
[i
] = vl_vb_init(
654 &r
->mv
[i
], r
->pipe
, r
->macroblocks_per_batch
,
655 sizeof(struct vertex2f
) / sizeof(float),
659 r
->vertex_elems_state
= r
->pipe
->create_vertex_elements_state(
660 r
->pipe
, NUM_VS_INPUTS
, vertex_elems
);
662 if (r
->vertex_elems_state
== NULL
)
665 r
->vs
= create_vert_shader(r
);
666 r
->fs
= create_frag_shader(r
);
668 if (r
->vs
== NULL
|| r
->fs
== NULL
)
675 cleanup_buffers(struct vl_mpeg12_mc_renderer
*r
)
681 for (i
= 0; i
< 3; ++i
) {
682 pipe_sampler_view_reference(&r
->sampler_views
.all
[i
], NULL
);
683 pipe_resource_reference(&r
->vertex_bufs
.all
[i
].buffer
, NULL
);
684 pipe_resource_reference(&r
->textures
.all
[i
], NULL
);
687 r
->pipe
->delete_vs_state(r
->pipe
, r
->vs
);
688 r
->pipe
->delete_fs_state(r
->pipe
, r
->fs
);
690 vl_vb_cleanup(&r
->pos
);
692 for (i
= 0; i
< 4; ++i
)
693 vl_vb_cleanup(&r
->mv
[i
]);
695 vl_idct_unmap_buffers(&r
->idct_luma
, &r
->idct_y
);
696 vl_idct_unmap_buffers(&r
->idct_chroma
, &r
->idct_cb
);
697 vl_idct_unmap_buffers(&r
->idct_chroma
, &r
->idct_cr
);
699 vl_idct_cleanup_buffer(&r
->idct_luma
, &r
->idct_y
);
700 vl_idct_cleanup_buffer(&r
->idct_chroma
, &r
->idct_cb
);
701 vl_idct_cleanup_buffer(&r
->idct_chroma
, &r
->idct_cr
);
703 vl_idct_cleanup(&r
->idct_luma
);
704 vl_idct_cleanup(&r
->idct_chroma
);
706 r
->pipe
->delete_vertex_elements_state(r
->pipe
, r
->vertex_elems_state
);
709 static struct pipe_sampler_view
710 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer
*r
, struct pipe_surface
*surface
)
712 struct pipe_sampler_view
*sampler_view
;
716 sampler_view
= (struct pipe_sampler_view
*)util_keymap_lookup(r
->texview_map
, &surface
);
718 struct pipe_sampler_view templat
;
719 boolean added_to_map
;
721 u_sampler_view_default_template(&templat
, surface
->texture
,
722 surface
->texture
->format
);
723 sampler_view
= r
->pipe
->create_sampler_view(r
->pipe
, surface
->texture
,
728 added_to_map
= util_keymap_insert(r
->texview_map
, &surface
,
729 sampler_view
, r
->pipe
);
730 assert(added_to_map
);
737 get_motion_vectors(struct pipe_mpeg12_macroblock
*mb
, struct vertex2f mv
[4])
739 switch (mb
->mb_type
) {
740 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
742 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
744 mv
[2].x
= mb
->pmv
[0][1][0];
745 mv
[2].y
= mb
->pmv
[0][1][1];
748 mv
[2].x
= mb
->pmv
[0][1][0];
749 mv
[2].y
= mb
->pmv
[0][1][1] - (mb
->pmv
[0][1][1] % 4);
751 mv
[3].x
= mb
->pmv
[1][1][0];
752 mv
[3].y
= mb
->pmv
[1][1][1] - (mb
->pmv
[1][1][1] % 4);
754 if(mb
->mvfs
[0][1]) mv
[2].y
+= 2;
755 if(!mb
->mvfs
[1][1]) mv
[3].y
-= 2;
760 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
761 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
763 if (mb
->mb_type
== PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
) {
765 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
766 mv
[0].x
= mb
->pmv
[0][1][0];
767 mv
[0].y
= mb
->pmv
[0][1][1];
770 mv
[0].x
= mb
->pmv
[0][1][0];
771 mv
[0].y
= mb
->pmv
[0][1][1] - (mb
->pmv
[0][1][1] % 4);
773 mv
[1].x
= mb
->pmv
[1][1][0];
774 mv
[1].y
= mb
->pmv
[1][1][1] - (mb
->pmv
[1][1][1] % 4);
776 if(mb
->mvfs
[0][1]) mv
[0].y
+= 2;
777 if(!mb
->mvfs
[1][1]) mv
[1].y
-= 2;
782 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
783 mv
[0].x
= mb
->pmv
[0][0][0];
784 mv
[0].y
= mb
->pmv
[0][0][1];
787 mv
[0].x
= mb
->pmv
[0][0][0];
788 mv
[0].y
= mb
->pmv
[0][0][1] - (mb
->pmv
[0][0][1] % 4);
790 mv
[1].x
= mb
->pmv
[1][0][0];
791 mv
[1].y
= mb
->pmv
[1][0][1] - (mb
->pmv
[1][0][1] % 4);
793 if(mb
->mvfs
[0][0]) mv
[0].y
+= 2;
794 if(!mb
->mvfs
[1][0]) mv
[1].y
-= 2;
804 empty_block(enum pipe_video_chroma_format chroma_format
,
805 unsigned cbp
, unsigned component
,
806 unsigned x
, unsigned y
)
808 /* TODO: Implement 422, 444 */
809 assert(chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
811 if(component
== 0) /*luma*/
812 return !(cbp
& (1 << (5 - (x
+ y
* 2))));
814 return !(cbp
& (1 << (2 - component
)));
818 grab_vectors(struct vl_mpeg12_mc_renderer
*r
,
819 struct pipe_mpeg12_macroblock
*mb
)
821 struct vertex2f mv
[4];
822 struct vertex_stream_0 info
;
829 info
.pos
.x
= mb
->mbx
;
830 info
.pos
.y
= mb
->mby
;
831 for ( i
= 0; i
< 2; ++i
) {
832 for ( j
= 0; j
< 2; ++j
) {
833 info
.eb
[i
][j
].y
= empty_block(r
->chroma_format
, mb
->cbp
, 0, j
, i
);
834 info
.eb
[i
][j
].cr
= empty_block(r
->chroma_format
, mb
->cbp
, 1, j
, i
);
835 info
.eb
[i
][j
].cb
= empty_block(r
->chroma_format
, mb
->cbp
, 2, j
, i
);
838 info
.interlaced
= mb
->dct_type
== PIPE_MPEG12_DCT_TYPE_FIELD
? 1.0f
: 0.0f
;
839 info
.frame_pred
= mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
? 1.0f
: 0.0f
;
840 info
.bkwd_pred
= mb
->mb_type
== PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
? 1.0f
: 0.0f
;
841 switch (mb
->mb_type
) {
842 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
843 info
.ref_frames
= -1.0f
;
846 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
847 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
848 info
.ref_frames
= 1.0f
;
851 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
852 info
.ref_frames
= 0.0f
;
859 vl_vb_add_block(&r
->pos
, (float*)&info
);
861 get_motion_vectors(mb
, mv
);
862 for ( j
= 0; j
< 4; ++j
)
863 vl_vb_add_block(&r
->mv
[j
], (float*)&mv
[j
]);
867 grab_blocks(struct vl_mpeg12_mc_renderer
*r
, unsigned mbx
, unsigned mby
,
868 enum pipe_mpeg12_dct_type dct_type
, unsigned cbp
, short *blocks
)
876 for (y
= 0; y
< 2; ++y
) {
877 for (x
= 0; x
< 2; ++x
, ++tb
) {
878 if (!empty_block(r
->chroma_format
, cbp
, 0, x
, y
)) {
879 vl_idct_add_block(&r
->idct_y
, mbx
* 2 + x
, mby
* 2 + y
, blocks
);
880 blocks
+= BLOCK_WIDTH
* BLOCK_HEIGHT
;
885 /* TODO: Implement 422, 444 */
886 assert(r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
888 for (tb
= 1; tb
< 3; ++tb
) {
889 if (!empty_block(r
->chroma_format
, cbp
, tb
, 0, 0)) {
891 vl_idct_add_block(&r
->idct_cb
, mbx
, mby
, blocks
);
893 vl_idct_add_block(&r
->idct_cr
, mbx
, mby
, blocks
);
894 blocks
+= BLOCK_WIDTH
* BLOCK_HEIGHT
;
900 grab_macroblock(struct vl_mpeg12_mc_renderer
*r
,
901 struct pipe_mpeg12_macroblock
*mb
)
906 assert(r
->num_macroblocks
< r
->macroblocks_per_batch
);
909 grab_blocks(r
, mb
->mbx
, mb
->mby
, mb
->dct_type
, mb
->cbp
, mb
->blocks
);
911 ++r
->num_macroblocks
;
915 texview_map_delete(const struct keymap
*map
,
916 const void *key
, void *data
,
919 struct pipe_sampler_view
*sv
= (struct pipe_sampler_view
*)data
;
926 pipe_sampler_view_reference(&sv
, NULL
);
930 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer
*renderer
,
931 struct pipe_context
*pipe
,
932 unsigned buffer_width
,
933 unsigned buffer_height
,
934 enum pipe_video_chroma_format chroma_format
,
935 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode
)
940 /* TODO: Implement other policies */
941 assert(bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
);
943 memset(renderer
, 0, sizeof(struct vl_mpeg12_mc_renderer
));
945 renderer
->pipe
= pipe
;
946 renderer
->buffer_width
= buffer_width
;
947 renderer
->buffer_height
= buffer_height
;
948 renderer
->chroma_format
= chroma_format
;
949 renderer
->bufmode
= bufmode
;
951 renderer
->texview_map
= util_new_keymap(sizeof(struct pipe_surface
*), -1,
953 if (!renderer
->texview_map
)
956 if (!init_pipe_state(renderer
))
957 goto error_pipe_state
;
959 if (!init_buffers(renderer
))
962 renderer
->surface
= NULL
;
963 renderer
->past
= NULL
;
964 renderer
->future
= NULL
;
965 renderer
->num_macroblocks
= 0;
970 cleanup_pipe_state(renderer
);
973 util_delete_keymap(renderer
->texview_map
, renderer
->pipe
);
978 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer
*renderer
)
982 util_delete_keymap(renderer
->texview_map
, renderer
->pipe
);
983 cleanup_pipe_state(renderer
);
984 cleanup_buffers(renderer
);
986 pipe_surface_reference(&renderer
->surface
, NULL
);
987 pipe_surface_reference(&renderer
->past
, NULL
);
988 pipe_surface_reference(&renderer
->future
, NULL
);
992 vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer
*renderer
)
998 vl_idct_map_buffers(&renderer
->idct_luma
, &renderer
->idct_y
);
999 vl_idct_map_buffers(&renderer
->idct_chroma
, &renderer
->idct_cr
);
1000 vl_idct_map_buffers(&renderer
->idct_chroma
, &renderer
->idct_cb
);
1002 vl_vb_map(&renderer
->pos
, renderer
->pipe
);
1003 for(i
= 0; i
< 4; ++i
)
1004 vl_vb_map(&renderer
->mv
[i
], renderer
->pipe
);
1008 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
*renderer
,
1009 struct pipe_surface
*surface
,
1010 struct pipe_surface
*past
,
1011 struct pipe_surface
*future
,
1012 unsigned num_macroblocks
,
1013 struct pipe_mpeg12_macroblock
1014 *mpeg12_macroblocks
,
1015 struct pipe_fence_handle
**fence
)
1019 assert(num_macroblocks
);
1020 assert(mpeg12_macroblocks
);
1022 if (surface
!= renderer
->surface
) {
1023 pipe_surface_reference(&renderer
->surface
, surface
);
1024 pipe_surface_reference(&renderer
->past
, past
);
1025 pipe_surface_reference(&renderer
->future
, future
);
1026 renderer
->fence
= fence
;
1029 while (num_macroblocks
) {
1030 unsigned left_in_batch
= renderer
->macroblocks_per_batch
- renderer
->num_macroblocks
;
1031 unsigned num_to_submit
= MIN2(num_macroblocks
, left_in_batch
);
1034 for (i
= 0; i
< num_to_submit
; ++i
) {
1035 assert(mpeg12_macroblocks
[i
].base
.codec
== PIPE_VIDEO_CODEC_MPEG12
);
1036 grab_macroblock(renderer
, &mpeg12_macroblocks
[i
]);
1039 num_macroblocks
-= num_to_submit
;
1041 if (renderer
->num_macroblocks
== renderer
->macroblocks_per_batch
) {
1042 vl_mpeg12_mc_unmap_buffer(renderer
);
1043 vl_mpeg12_mc_renderer_flush(renderer
);
1044 vl_mpeg12_mc_map_buffer(renderer
);
1050 vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer
*renderer
)
1056 vl_idct_unmap_buffers(&renderer
->idct_luma
, &renderer
->idct_y
);
1057 vl_idct_unmap_buffers(&renderer
->idct_chroma
, &renderer
->idct_cr
);
1058 vl_idct_unmap_buffers(&renderer
->idct_chroma
, &renderer
->idct_cb
);
1060 vl_vb_unmap(&renderer
->pos
, renderer
->pipe
);
1061 for(i
= 0; i
< 4; ++i
)
1062 vl_vb_unmap(&renderer
->mv
[i
], renderer
->pipe
);
1066 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer
*renderer
)
1071 assert(renderer
->num_macroblocks
<= renderer
->macroblocks_per_batch
);
1073 if (renderer
->num_macroblocks
== 0)
1076 vl_idct_flush(&renderer
->idct_luma
, &renderer
->idct_y
);
1077 vl_idct_flush(&renderer
->idct_chroma
, &renderer
->idct_cr
);
1078 vl_idct_flush(&renderer
->idct_chroma
, &renderer
->idct_cb
);
1080 vl_vb_restart(&renderer
->pos
);
1081 for(i
= 0; i
< 4; ++i
)
1082 vl_vb_restart(&renderer
->mv
[i
]);
1084 renderer
->fb_state
.cbufs
[0] = renderer
->surface
;
1085 renderer
->pipe
->bind_rasterizer_state(renderer
->pipe
, renderer
->rs_state
);
1086 renderer
->pipe
->set_framebuffer_state(renderer
->pipe
, &renderer
->fb_state
);
1087 renderer
->pipe
->set_viewport_state(renderer
->pipe
, &renderer
->viewport
);
1088 renderer
->pipe
->set_vertex_buffers(renderer
->pipe
, 6, renderer
->vertex_bufs
.all
);
1089 renderer
->pipe
->bind_vertex_elements_state(renderer
->pipe
, renderer
->vertex_elems_state
);
1091 if (renderer
->past
) {
1092 renderer
->textures
.individual
.ref
[0] = renderer
->past
->texture
;
1093 renderer
->sampler_views
.individual
.ref
[0] = find_or_create_sampler_view(renderer
, renderer
->past
);
1096 if (renderer
->future
) {
1097 renderer
->textures
.individual
.ref
[1] = renderer
->future
->texture
;
1098 renderer
->sampler_views
.individual
.ref
[1] = find_or_create_sampler_view(renderer
, renderer
->future
);
1100 renderer
->pipe
->set_fragment_sampler_views(renderer
->pipe
, 5, renderer
->sampler_views
.all
);
1101 renderer
->pipe
->bind_fragment_sampler_states(renderer
->pipe
, 5, renderer
->samplers
.all
);
1103 renderer
->pipe
->bind_vs_state(renderer
->pipe
, renderer
->vs
);
1104 renderer
->pipe
->bind_fs_state(renderer
->pipe
, renderer
->fs
);
1105 util_draw_arrays(renderer
->pipe
, PIPE_PRIM_QUADS
, 0, renderer
->num_macroblocks
* 4);
1107 renderer
->pipe
->flush(renderer
->pipe
, PIPE_FLUSH_RENDER_CACHE
, renderer
->fence
);
1109 renderer
->num_macroblocks
= 0;