[g3dvl] use a single vertex buffer for both idct and mc
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <util/u_draw.h>
39 #include <tgsi/tgsi_ureg.h>
40
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45
46 enum VS_OUTPUT
47 {
48 VS_O_VPOS,
49 VS_O_LINE,
50 VS_O_TEX0,
51 VS_O_TEX1,
52 VS_O_TEX2,
53 VS_O_EB_0,
54 VS_O_EB_1,
55 VS_O_INFO,
56 VS_O_MV0,
57 VS_O_MV1,
58 VS_O_MV2,
59 VS_O_MV3
60 };
61
62 static const unsigned const_empty_block_mask_420[3][2][2] = {
63 { { 0x20, 0x10 }, { 0x08, 0x04 } },
64 { { 0x02, 0x02 }, { 0x02, 0x02 } },
65 { { 0x01, 0x01 }, { 0x01, 0x01 } }
66 };
67
68 static void *
69 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
70 {
71 struct ureg_program *shader;
72 struct ureg_src block_scale, mv_scale;
73 struct ureg_src vrect, vpos, eb[2][2], vmv[4];
74 struct ureg_dst t_vpos, t_vtex, t_vmv;
75 struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
76 unsigned i, label;
77
78 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
79 if (!shader)
80 return NULL;
81
82 t_vpos = ureg_DECL_temporary(shader);
83 t_vtex = ureg_DECL_temporary(shader);
84 t_vmv = ureg_DECL_temporary(shader);
85
86 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
87 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
88 eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
89 eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
90 eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
91 eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
92
93 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
94 o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
95 o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
96 o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
97 o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
98 o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
99 o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
100 o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
101
102 for (i = 0; i < 4; ++i) {
103 vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
104 o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
105 }
106
107 /*
108 * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
109 * mv_scale = 0.5 / (dst.width, dst.height);
110 *
111 * t_vpos = (vpos + vrect) * block_scale
112 * o_vpos.xy = t_vpos
113 * o_vpos.zw = vpos
114 *
115 * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
116 *
117 * o_frame_pred = frame_pred
118 * o_info.x = ref_frames
119 * o_info.y = ref_frames > 0
120 * o_info.z = bkwd_pred
121 *
122 * // Apply motion vectors
123 * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale
124 *
125 * o_line.xy = vrect * 8
126 * o_line.z = interlaced
127 *
128 * if(eb[0][0].w) { //interlaced
129 * t_vtex.x = vrect.x
130 * t_vtex.y = vrect.y * 0.5
131 * t_vtex += vpos
132 *
133 * o_vtex[0].xy = t_vtex * block_scale
134 *
135 * t_vtex.y += 0.5
136 * o_vtex[1].xy = t_vtex * block_scale
137 * } else {
138 * o_vtex[0..1].xy = t_vpos
139 * }
140 * o_vtex[2].xy = t_vpos
141 *
142 */
143 block_scale = ureg_imm2f(shader,
144 (float)MACROBLOCK_WIDTH / r->buffer_width,
145 (float)MACROBLOCK_HEIGHT / r->buffer_height);
146
147 mv_scale = ureg_imm2f(shader,
148 0.5f / r->buffer_width,
149 0.5f / r->buffer_height);
150
151 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
152 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
153 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
154 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
155
156 ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
157 ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
158 eb[0][1], eb[0][0]);
159 ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
160 ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
161 eb[1][1], eb[1][0]);
162
163 ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
164 ureg_scalar(eb[1][1], TGSI_SWIZZLE_W));
165 ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
166 ureg_scalar(eb[1][1], TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.0f));
167 ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z),
168 ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
169
170 ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos));
171 ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos));
172
173 ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
174 ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
175 vmv[0], vmv[1]);
176 ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
177
178 ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
179 ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
180 vmv[2], vmv[3]);
181 ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
182
183 ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
184 ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
185 ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
186
187 ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
188 ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
189 vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
190
191 ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label);
192
193 ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
194 ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
195 ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
196 ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
197 ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
198 ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
199
200 ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X),
201 ureg_scalar(vrect, TGSI_SWIZZLE_Y),
202 ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
203
204 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
205 ureg_ENDIF(shader);
206
207 ureg_release_temporary(shader, t_vtex);
208 ureg_release_temporary(shader, t_vpos);
209 ureg_release_temporary(shader, t_vmv);
210
211 ureg_END(shader);
212
213 return ureg_create_shader_and_destroy(shader, r->pipe);
214 }
215
216 static struct ureg_dst
217 calc_field(struct ureg_program *shader)
218 {
219 struct ureg_dst tmp;
220 struct ureg_src line;
221
222 tmp = ureg_DECL_temporary(shader);
223
224 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
225
226 /*
227 * line.x going from 0 to 1 if not interlaced
228 * line.x going from 0 to 8 in steps of 0.5 if interlaced
229 * line.y going from 0 to 8 in steps of 0.5
230 *
231 * tmp.xy = fraction(line)
232 * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
233 */
234 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line);
235 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
236
237 return tmp;
238 }
239
240 static struct ureg_dst
241 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
242 {
243 struct ureg_src tc[3], sampler[3], eb[2];
244 struct ureg_dst texel, t_tc, t_eb_info;
245 unsigned i, label;
246
247 texel = ureg_DECL_temporary(shader);
248 t_tc = ureg_DECL_temporary(shader);
249 t_eb_info = ureg_DECL_temporary(shader);
250
251 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
252 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
253 tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
254
255 eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT);
256 eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT);
257
258 for (i = 0; i < 3; ++i) {
259 sampler[i] = ureg_DECL_sampler(shader, i);
260 }
261
262 /*
263 * texel.y = tex(field.y ? tc[1] : tc[0], sampler[0])
264 * texel.cb = tex(tc[2], sampler[1])
265 * texel.cr = tex(tc[2], sampler[2])
266 */
267
268 ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
269 ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
270 tc[1], tc[0]);
271
272 ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ),
273 ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
274 eb[1], eb[0]);
275
276 /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
277 ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
278
279 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
280 for (i = 0; i < 3; ++i) {
281 ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
282
283 /* Nouveau can't writemask tex dst regs (yet?), so this won't work anymore on nvidia hardware */
284 if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
285 ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
286 } else {
287 ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]);
288 }
289
290 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
291 ureg_ENDIF(shader);
292 }
293
294 ureg_release_temporary(shader, t_tc);
295 ureg_release_temporary(shader, t_eb_info);
296
297 return texel;
298 }
299
300 static struct ureg_dst
301 fetch_ref(struct ureg_program *shader, struct ureg_dst field)
302 {
303 struct ureg_src info;
304 struct ureg_src tc[4], sampler[2];
305 struct ureg_dst ref[2], result;
306 unsigned i, intra_label, bi_label, label;
307
308 info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
309
310 for (i = 0; i < 4; ++i)
311 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
312
313 for (i = 0; i < 2; ++i) {
314 sampler[i] = ureg_DECL_sampler(shader, i + 3);
315 ref[i] = ureg_DECL_temporary(shader);
316 }
317
318 result = ureg_DECL_temporary(shader);
319
320 ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f));
321
322 ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Y), &intra_label);
323 ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
324 ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
325 tc[1], tc[0]);
326
327 ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label);
328
329 /*
330 * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
331 */
332 ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label);
333 ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
334 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
335 ureg_ELSE(shader, &label);
336 ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
337 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
338 ureg_ENDIF(shader);
339
340 ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
341 ureg_ELSE(shader, &bi_label);
342
343 /*
344 * if (field.z)
345 * ref[0..1] = tex(tc[0..1], sampler[0..1])
346 * else
347 * ref[0..1] = tex(tc[2..3], sampler[0..1])
348 */
349 ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
350 ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
351 tc[3], tc[2]);
352 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
353 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
354
355 ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f),
356 ureg_src(ref[0]), ureg_src(ref[1]));
357
358 ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
359 ureg_ENDIF(shader);
360 ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
361 ureg_ENDIF(shader);
362
363 for (i = 0; i < 2; ++i)
364 ureg_release_temporary(shader, ref[i]);
365
366 return result;
367 }
368
369 static void *
370 create_frag_shader(struct vl_mpeg12_mc_renderer *r)
371 {
372 struct ureg_program *shader;
373 struct ureg_dst result;
374 struct ureg_dst field, texel;
375 struct ureg_dst fragment;
376
377 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
378 if (!shader)
379 return NULL;
380
381 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
382
383 field = calc_field(shader);
384 texel = fetch_ycbcr(r, shader, field);
385
386 result = fetch_ref(shader, field);
387
388 ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(texel), ureg_src(result));
389
390 ureg_release_temporary(shader, field);
391 ureg_release_temporary(shader, texel);
392 ureg_release_temporary(shader, result);
393 ureg_END(shader);
394
395 return ureg_create_shader_and_destroy(shader, r->pipe);
396 }
397
398 static bool
399 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
400 {
401 struct pipe_sampler_state sampler;
402 struct pipe_rasterizer_state rs_state;
403 unsigned filters[5];
404 unsigned i;
405
406 assert(r);
407
408 r->viewport.scale[0] = r->buffer_width;
409 r->viewport.scale[1] = r->buffer_height;
410 r->viewport.scale[2] = 1;
411 r->viewport.scale[3] = 1;
412 r->viewport.translate[0] = 0;
413 r->viewport.translate[1] = 0;
414 r->viewport.translate[2] = 0;
415 r->viewport.translate[3] = 0;
416
417 r->fb_state.width = r->buffer_width;
418 r->fb_state.height = r->buffer_height;
419 r->fb_state.nr_cbufs = 1;
420 r->fb_state.zsbuf = NULL;
421
422 /* Luma filter */
423 filters[0] = PIPE_TEX_FILTER_NEAREST;
424 /* Chroma filters */
425 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
426 filters[1] = PIPE_TEX_FILTER_NEAREST;
427 filters[2] = PIPE_TEX_FILTER_NEAREST;
428 }
429 else {
430 filters[1] = PIPE_TEX_FILTER_LINEAR;
431 filters[2] = PIPE_TEX_FILTER_LINEAR;
432 }
433 /* Fwd, bkwd ref filters */
434 filters[3] = PIPE_TEX_FILTER_LINEAR;
435 filters[4] = PIPE_TEX_FILTER_LINEAR;
436
437 for (i = 0; i < 5; ++i) {
438 memset(&sampler, 0, sizeof(sampler));
439 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
440 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
441 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
442 sampler.min_img_filter = filters[i];
443 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
444 sampler.mag_img_filter = filters[i];
445 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
446 sampler.compare_func = PIPE_FUNC_ALWAYS;
447 sampler.normalized_coords = 1;
448 /*sampler.shadow_ambient = ; */
449 /*sampler.lod_bias = ; */
450 sampler.min_lod = 0;
451 /*sampler.max_lod = ; */
452 sampler.border_color[0] = 0.0f;
453 sampler.border_color[1] = 0.0f;
454 sampler.border_color[2] = 0.0f;
455 sampler.border_color[3] = 0.0f;
456 /*sampler.max_anisotropy = ; */
457 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
458 }
459
460 memset(&rs_state, 0, sizeof(rs_state));
461 /*rs_state.sprite_coord_enable */
462 rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
463 rs_state.point_quad_rasterization = true;
464 rs_state.point_size = BLOCK_WIDTH;
465 rs_state.gl_rasterization_rules = true;
466 r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
467
468 return true;
469 }
470
471 static void
472 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
473 {
474 unsigned i;
475
476 assert(r);
477
478 for (i = 0; i < 5; ++i)
479 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
480
481 r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
482 }
483
484 static bool
485 init_buffers(struct vl_mpeg12_mc_renderer *r)
486 {
487 struct pipe_resource *idct_matrix;
488
489 const unsigned mbw =
490 align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
491 const unsigned mbh =
492 align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
493
494 unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
495
496 assert(r);
497
498 r->macroblocks_per_batch =
499 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
500
501 if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
502 return false;
503
504 if (!vl_idct_init(&r->idct_y, r->pipe, r->buffer_width, r->buffer_height,
505 2, 2, TGSI_SWIZZLE_X, idct_matrix))
506 return false;
507
508 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
509 chroma_width = r->buffer_width / 2;
510 chroma_height = r->buffer_height / 2;
511 chroma_blocks_x = 1;
512 chroma_blocks_y = 1;
513 } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
514 chroma_width = r->buffer_width;
515 chroma_height = r->buffer_height / 2;
516 chroma_blocks_x = 2;
517 chroma_blocks_y = 1;
518 } else {
519 chroma_width = r->buffer_width;
520 chroma_height = r->buffer_height;
521 chroma_blocks_x = 2;
522 chroma_blocks_y = 2;
523 }
524
525 if(!vl_idct_init(&r->idct_cr, r->pipe, chroma_width, chroma_height,
526 chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
527 return false;
528
529 if(!vl_idct_init(&r->idct_cb, r->pipe, chroma_width, chroma_height,
530 chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
531 return false;
532
533 r->quad = vl_vb_upload_quads(r->pipe, 1, 1);
534 r->vertex_elems_state = vl_vb_get_elems_state(r->pipe, true);
535
536 if (r->vertex_elems_state == NULL)
537 return false;
538
539 r->vs = create_vert_shader(r);
540 r->fs = create_frag_shader(r);
541
542 if (r->vs == NULL || r->fs == NULL)
543 return false;
544
545 return true;
546 }
547
548 static void
549 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
550 {
551 assert(r);
552
553 r->pipe->delete_vs_state(r->pipe, r->vs);
554 r->pipe->delete_fs_state(r->pipe, r->fs);
555
556 vl_idct_cleanup(&r->idct_y);
557 vl_idct_cleanup(&r->idct_cr);
558 vl_idct_cleanup(&r->idct_cb);
559
560 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
561 }
562
563 static struct pipe_sampler_view
564 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
565 {
566 struct pipe_sampler_view *sampler_view;
567 assert(r);
568 assert(surface);
569
570 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
571 if (!sampler_view) {
572 struct pipe_sampler_view templat;
573 boolean added_to_map;
574
575 u_sampler_view_default_template(&templat, surface->texture,
576 surface->texture->format);
577 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
578 &templat);
579 if (!sampler_view)
580 return NULL;
581
582 added_to_map = util_keymap_insert(r->texview_map, &surface,
583 sampler_view, r->pipe);
584 assert(added_to_map);
585 }
586
587 return sampler_view;
588 }
589
590 static void
591 grab_blocks(struct vl_mpeg12_mc_renderer *r,
592 struct vl_mpeg12_mc_buffer *buffer,
593 unsigned mbx, unsigned mby,
594 unsigned cbp, short *blocks)
595 {
596 unsigned tb = 0;
597 unsigned x, y;
598
599 assert(r);
600 assert(blocks);
601
602 for (y = 0; y < 2; ++y) {
603 for (x = 0; x < 2; ++x, ++tb) {
604 if (cbp & (*r->empty_block_mask)[0][y][x]) {
605 vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
606 blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
607 }
608 }
609 }
610
611 /* TODO: Implement 422, 444 */
612 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
613
614 for (tb = 1; tb < 3; ++tb) {
615 if (cbp & (*r->empty_block_mask)[tb][0][0]) {
616 if(tb == 1)
617 vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks);
618 else
619 vl_idct_add_block(&buffer->idct_cr, mbx, mby, blocks);
620 blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
621 }
622 }
623 }
624
625 static void
626 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
627 struct vl_mpeg12_mc_buffer *buffer,
628 struct pipe_mpeg12_macroblock *mb)
629 {
630 assert(r);
631 assert(mb);
632 assert(mb->blocks);
633
634 vl_vb_add_block(&buffer->vertex_stream, mb, r->empty_block_mask);
635 grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
636 }
637
638 static void
639 texview_map_delete(const struct keymap *map,
640 const void *key, void *data,
641 void *user)
642 {
643 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
644
645 assert(map);
646 assert(key);
647 assert(data);
648 assert(user);
649
650 pipe_sampler_view_reference(&sv, NULL);
651 }
652
653 bool
654 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
655 struct pipe_context *pipe,
656 unsigned buffer_width,
657 unsigned buffer_height,
658 enum pipe_video_chroma_format chroma_format,
659 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode)
660 {
661 assert(renderer);
662 assert(pipe);
663
664 /* TODO: Implement other policies */
665 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
666
667 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
668
669 renderer->pipe = pipe;
670 renderer->buffer_width = buffer_width;
671 renderer->buffer_height = buffer_height;
672 renderer->chroma_format = chroma_format;
673 renderer->bufmode = bufmode;
674
675 /* TODO: Implement 422, 444 */
676 assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
677 renderer->empty_block_mask = &const_empty_block_mask_420;
678
679 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
680 texview_map_delete);
681 if (!renderer->texview_map)
682 return false;
683
684 if (!init_pipe_state(renderer))
685 goto error_pipe_state;
686
687 if (!init_buffers(renderer))
688 goto error_buffers;
689
690 return true;
691
692 error_buffers:
693 cleanup_pipe_state(renderer);
694
695 error_pipe_state:
696 util_delete_keymap(renderer->texview_map, renderer->pipe);
697 return false;
698 }
699
700 void
701 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
702 {
703 assert(renderer);
704
705 util_delete_keymap(renderer->texview_map, renderer->pipe);
706 cleanup_pipe_state(renderer);
707 cleanup_buffers(renderer);
708 }
709
710 bool
711 vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
712 {
713 struct pipe_resource template;
714 struct pipe_sampler_view sampler_view;
715
716 unsigned i;
717
718 assert(renderer && buffer);
719
720 buffer->surface = NULL;
721 buffer->past = NULL;
722 buffer->future = NULL;
723
724 buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride;
725 buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
726 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
727
728 buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, renderer->pipe,
729 renderer->macroblocks_per_batch);
730
731 memset(&template, 0, sizeof(struct pipe_resource));
732 template.target = PIPE_TEXTURE_2D;
733 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
734 template.format = PIPE_FORMAT_R16_SNORM;
735 template.last_level = 0;
736 template.width0 = renderer->buffer_width;
737 template.height0 = renderer->buffer_height;
738 template.depth0 = 1;
739 template.array_size = 1;
740 template.usage = PIPE_USAGE_STATIC;
741 template.bind = PIPE_BIND_SAMPLER_VIEW;
742 template.flags = 0;
743
744 buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
745
746 if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y,
747 buffer->textures.individual.y,
748 buffer->vertex_bufs.individual.stream))
749 return false;
750
751 if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
752 template.width0 = renderer->buffer_width / 2;
753 template.height0 = renderer->buffer_height / 2;
754 }
755 else if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
756 template.height0 = renderer->buffer_height / 2;
757
758 buffer->textures.individual.cb =
759 renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
760 buffer->textures.individual.cr =
761 renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
762
763 if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb,
764 buffer->textures.individual.cb,
765 buffer->vertex_bufs.individual.stream))
766 return false;
767
768 if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr,
769 buffer->textures.individual.cr,
770 buffer->vertex_bufs.individual.stream))
771 return false;
772
773 for (i = 0; i < 3; ++i) {
774 u_sampler_view_default_template(&sampler_view,
775 buffer->textures.all[i],
776 buffer->textures.all[i]->format);
777 sampler_view.swizzle_r = i == 0 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
778 sampler_view.swizzle_g = i == 1 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
779 sampler_view.swizzle_b = i == 2 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
780 sampler_view.swizzle_a = PIPE_SWIZZLE_ONE;
781 buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
782 renderer->pipe, buffer->textures.all[i], &sampler_view);
783 }
784
785 return true;
786 }
787
788 void
789 vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
790 {
791 unsigned i;
792
793 assert(renderer && buffer);
794
795 for (i = 0; i < 3; ++i) {
796 pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
797 pipe_resource_reference(&buffer->vertex_bufs.all[i].buffer, NULL);
798 pipe_resource_reference(&buffer->textures.all[i], NULL);
799 }
800
801 pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
802 vl_vb_cleanup(&buffer->vertex_stream);
803
804 vl_idct_cleanup_buffer(&renderer->idct_y, &buffer->idct_y);
805 vl_idct_cleanup_buffer(&renderer->idct_cb, &buffer->idct_cb);
806 vl_idct_cleanup_buffer(&renderer->idct_cr, &buffer->idct_cr);
807
808 pipe_surface_reference(&buffer->surface, NULL);
809 pipe_surface_reference(&buffer->past, NULL);
810 pipe_surface_reference(&buffer->future, NULL);
811 }
812
813 void
814 vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
815 {
816 assert(renderer && buffer);
817
818 vl_idct_map_buffers(&renderer->idct_y, &buffer->idct_y);
819 vl_idct_map_buffers(&renderer->idct_cr, &buffer->idct_cr);
820 vl_idct_map_buffers(&renderer->idct_cb, &buffer->idct_cb);
821
822 vl_vb_map(&buffer->vertex_stream, renderer->pipe);
823 }
824
825 void
826 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
827 struct vl_mpeg12_mc_buffer *buffer,
828 struct pipe_surface *surface,
829 struct pipe_surface *past,
830 struct pipe_surface *future,
831 unsigned num_macroblocks,
832 struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
833 struct pipe_fence_handle **fence)
834 {
835 unsigned i;
836
837 assert(renderer && buffer);
838 assert(surface);
839 assert(num_macroblocks);
840 assert(mpeg12_macroblocks);
841
842 if (surface != buffer->surface) {
843 pipe_surface_reference(&buffer->surface, surface);
844 pipe_surface_reference(&buffer->past, past);
845 pipe_surface_reference(&buffer->future, future);
846 buffer->fence = fence;
847 } else {
848 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
849 assert(buffer->past == past);
850 assert(buffer->future == future);
851 }
852
853 for (i = 0; i < num_macroblocks; ++i) {
854 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
855 grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]);
856 }
857 }
858
859 void
860 vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
861 {
862 assert(renderer && buffer);
863
864 vl_idct_unmap_buffers(&renderer->idct_y, &buffer->idct_y);
865 vl_idct_unmap_buffers(&renderer->idct_cr, &buffer->idct_cr);
866 vl_idct_unmap_buffers(&renderer->idct_cb, &buffer->idct_cb);
867
868 vl_vb_unmap(&buffer->vertex_stream, renderer->pipe);
869 }
870
871 void
872 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
873 {
874 unsigned num_not_empty, num_empty;
875 assert(renderer && buffer);
876
877 num_not_empty = buffer->vertex_stream.num_not_empty;
878 num_empty = buffer->vertex_stream.num_empty;
879
880 if (num_not_empty == 0 && num_empty == 0)
881 return;
882
883 vl_idct_flush(&renderer->idct_y, &buffer->idct_y, num_not_empty);
884 vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, num_not_empty);
885 vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, num_not_empty);
886
887 renderer->fb_state.cbufs[0] = buffer->surface;
888 renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
889 renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
890 renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
891 renderer->pipe->set_vertex_buffers(renderer->pipe, 2, buffer->vertex_bufs.all);
892 renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
893
894 if (buffer->past) {
895 buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
896 } else {
897 buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface);
898 }
899
900 if (buffer->future) {
901 buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future);
902 } else {
903 buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface);
904 }
905
906 renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
907 renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
908
909 renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
910 renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
911
912 if (num_not_empty > 0)
913 util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_not_empty);
914
915 if (num_empty > 0)
916 util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
917 buffer->vertex_stream.size - num_empty, num_empty);
918
919 renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
920
921 /* Next time we get this surface it may have new ref frames */
922 pipe_surface_reference(&buffer->surface, NULL);
923 pipe_surface_reference(&buffer->past, NULL);
924 pipe_surface_reference(&buffer->future, NULL);
925
926 vl_vb_restart(&buffer->vertex_stream);
927 }