Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / auxiliary / vl / vl_compositor.c
1 #include "vl_compositor.h"
2 #include <assert.h>
3 #include <pipe/p_context.h>
4 #include <pipe/p_inlines.h>
5 #include <tgsi/tgsi_parse.h>
6 #include <tgsi/tgsi_build.h>
7 #include <util/u_memory.h>
8 #include "vl_shader_build.h"
9
10 struct vertex2f
11 {
12 float x, y;
13 };
14
15 struct vertex4f
16 {
17 float x, y, z, w;
18 };
19
20 struct vertex_shader_consts
21 {
22 struct vertex4f dst_scale;
23 struct vertex4f dst_trans;
24 struct vertex4f src_scale;
25 struct vertex4f src_trans;
26 };
27
28 struct fragment_shader_consts
29 {
30 struct vertex4f bias;
31 float matrix[16];
32 };
33
34 /*
35 * Represents 2 triangles in a strip in normalized coords.
36 * Used to render the surface onto the frame buffer.
37 */
38 static const struct vertex2f surface_verts[4] =
39 {
40 {0.0f, 0.0f},
41 {0.0f, 1.0f},
42 {1.0f, 0.0f},
43 {1.0f, 1.0f}
44 };
45
46 /*
47 * Represents texcoords for the above. We can use the position values directly.
48 * TODO: Duplicate these in the shader, no need to create a buffer.
49 */
50 static const struct vertex2f *surface_texcoords = surface_verts;
51
52 /*
53 * Identity color conversion constants, for debugging
54 */
55 static const struct fragment_shader_consts identity =
56 {
57 {
58 0.0f, 0.0f, 0.0f, 0.0f
59 },
60 {
61 1.0f, 0.0f, 0.0f, 0.0f,
62 0.0f, 1.0f, 0.0f, 0.0f,
63 0.0f, 0.0f, 1.0f, 0.0f,
64 0.0f, 0.0f, 0.0f, 1.0f
65 }
66 };
67
68 /*
69 * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
70 * Y is in [16,235], Cb and Cr are in [16,240]
71 * R, G, and B are in [16,235]
72 */
73 static const struct fragment_shader_consts bt_601 =
74 {
75 {
76 0.0f, 0.501960784f, 0.501960784f, 0.0f
77 },
78 {
79 1.0f, 0.0f, 1.371f, 0.0f,
80 1.0f, -0.336f, -0.698f, 0.0f,
81 1.0f, 1.732f, 0.0f, 0.0f,
82 0.0f, 0.0f, 0.0f, 1.0f
83 }
84 };
85
86 /*
87 * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
88 * Y is in [16,235], Cb and Cr are in [16,240]
89 * R, G, and B are in [0,255]
90 */
91 static const struct fragment_shader_consts bt_601_full =
92 {
93 {
94 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
95 },
96 {
97 1.164f, 0.0f, 1.596f, 0.0f,
98 1.164f, -0.391f, -0.813f, 0.0f,
99 1.164f, 2.018f, 0.0f, 0.0f,
100 0.0f, 0.0f, 0.0f, 1.0f
101 }
102 };
103
104 /*
105 * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
106 * Y is in [16,235], Cb and Cr are in [16,240]
107 * R, G, and B are in [16,235]
108 */
109 static const struct fragment_shader_consts bt_709 =
110 {
111 {
112 0.0f, 0.501960784f, 0.501960784f, 0.0f
113 },
114 {
115 1.0f, 0.0f, 1.540f, 0.0f,
116 1.0f, -0.183f, -0.459f, 0.0f,
117 1.0f, 1.816f, 0.0f, 0.0f,
118 0.0f, 0.0f, 0.0f, 1.0f
119 }
120 };
121
122 /*
123 * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
124 * Y is in [16,235], Cb and Cr are in [16,240]
125 * R, G, and B are in [0,255]
126 */
127 const struct fragment_shader_consts bt_709_full =
128 {
129 {
130 0.062745098f, 0.501960784f, 0.501960784f, 0.0f
131 },
132 {
133 1.164f, 0.0f, 1.793f, 0.0f,
134 1.164f, -0.213f, -0.534f, 0.0f,
135 1.164f, 2.115f, 0.0f, 0.0f,
136 0.0f, 0.0f, 0.0f, 1.0f
137 }
138 };
139
140 static void
141 create_vert_shader(struct vl_compositor *c)
142 {
143 const unsigned max_tokens = 50;
144
145 struct pipe_shader_state vs;
146 struct tgsi_token *tokens;
147 struct tgsi_header *header;
148
149 struct tgsi_full_declaration decl;
150 struct tgsi_full_instruction inst;
151
152 unsigned ti;
153
154 unsigned i;
155
156 assert(c);
157
158 tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
159 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
160 header = (struct tgsi_header*)&tokens[1];
161 *header = tgsi_build_header();
162 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
163
164 ti = 3;
165
166 /*
167 * decl i0 ; Vertex pos
168 * decl i1 ; Vertex texcoords
169 */
170 for (i = 0; i < 2; i++) {
171 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
172 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
173 }
174
175 /*
176 * decl c0 ; Scaling vector to scale vertex pos rect to destination size
177 * decl c1 ; Translation vector to move vertex pos rect into position
178 * decl c2 ; Scaling vector to scale texcoord rect to source size
179 * decl c3 ; Translation vector to move texcoord rect into position
180 */
181 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
182 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
183
184 /*
185 * decl o0 ; Vertex pos
186 * decl o1 ; Vertex texcoords
187 */
188 for (i = 0; i < 2; i++) {
189 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
190 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
191 }
192
193 /* decl t0, t1 */
194 decl = vl_decl_temps(0, 1);
195 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
196
197 /*
198 * mad o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos
199 * mad o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos
200 */
201 for (i = 0; i < 2; ++i) {
202 inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
203 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
204 }
205
206 /* end */
207 inst = vl_end();
208 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
209
210 assert(ti <= max_tokens);
211
212 vs.tokens = tokens;
213 c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs);
214 FREE(tokens);
215 }
216
217 static void
218 create_frag_shader(struct vl_compositor *c)
219 {
220 const unsigned max_tokens = 50;
221
222 struct pipe_shader_state fs;
223 struct tgsi_token *tokens;
224 struct tgsi_header *header;
225
226 struct tgsi_full_declaration decl;
227 struct tgsi_full_instruction inst;
228
229 unsigned ti;
230
231 unsigned i;
232
233 assert(c);
234
235 tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
236 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
237 header = (struct tgsi_header*)&tokens[1];
238 *header = tgsi_build_header();
239 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
240
241 ti = 3;
242
243 /* decl i0 ; Texcoords for s0 */
244 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
245 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
246
247 /*
248 * decl c0 ; Bias vector for CSC
249 * decl c1-c4 ; CSC matrix c1-c4
250 */
251 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
252 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
253
254 /* decl o0 ; Fragment color */
255 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
256 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
257
258 /* decl t0 */
259 decl = vl_decl_temps(0, 0);
260 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
261
262 /* decl s0 ; Sampler for tex containing picture to display */
263 decl = vl_decl_samplers(0, 0);
264 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
265
266 /* tex2d t0, i0, s0 ; Read src pixel */
267 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
268 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
269
270 /* sub t0, t0, c0 ; Subtract bias vector from pixel */
271 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
272 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
273
274 /*
275 * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
276 * dp4 o0.y, t0, c2
277 * dp4 o0.z, t0, c3
278 */
279 for (i = 0; i < 3; ++i) {
280 inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
281 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
282 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
283 }
284
285 /* end */
286 inst = vl_end();
287 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
288
289 assert(ti <= max_tokens);
290
291 fs.tokens = tokens;
292 c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs);
293 FREE(tokens);
294 }
295
296 static bool
297 init_pipe_state(struct vl_compositor *c)
298 {
299 struct pipe_sampler_state sampler;
300
301 assert(c);
302
303 c->fb_state.nr_cbufs = 1;
304 c->fb_state.zsbuf = NULL;
305
306 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
307 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
308 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
309 sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
310 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
311 sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
312 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
313 sampler.compare_func = PIPE_FUNC_ALWAYS;
314 sampler.normalized_coords = 1;
315 /*sampler.prefilter = ;*/
316 /*sampler.lod_bias = ;*/
317 /*sampler.min_lod = ;*/
318 /*sampler.max_lod = ;*/
319 /*sampler.border_color[i] = ;*/
320 /*sampler.max_anisotropy = ;*/
321 c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
322
323 return true;
324 }
325
326 static void cleanup_pipe_state(struct vl_compositor *c)
327 {
328 assert(c);
329
330 c->pipe->delete_sampler_state(c->pipe, c->sampler);
331 }
332
333 static bool
334 init_shaders(struct vl_compositor *c)
335 {
336 assert(c);
337
338 create_vert_shader(c);
339 create_frag_shader(c);
340
341 return true;
342 }
343
344 static void cleanup_shaders(struct vl_compositor *c)
345 {
346 assert(c);
347
348 c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
349 c->pipe->delete_fs_state(c->pipe, c->fragment_shader);
350 }
351
352 static bool
353 init_buffers(struct vl_compositor *c)
354 {
355 assert(c);
356
357 /*
358 * Create our vertex buffer and vertex buffer element
359 * VB contains 4 vertices that render a quad covering the entire window
360 * to display a rendered surface
361 * Quad is rendered as a tri strip
362 */
363 c->vertex_bufs[0].stride = sizeof(struct vertex2f);
364 c->vertex_bufs[0].max_index = 3;
365 c->vertex_bufs[0].buffer_offset = 0;
366 c->vertex_bufs[0].buffer = pipe_buffer_create
367 (
368 c->pipe->screen,
369 1,
370 PIPE_BUFFER_USAGE_VERTEX,
371 sizeof(struct vertex2f) * 4
372 );
373
374 memcpy
375 (
376 pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
377 surface_verts,
378 sizeof(struct vertex2f) * 4
379 );
380
381 pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
382
383 c->vertex_elems[0].src_offset = 0;
384 c->vertex_elems[0].vertex_buffer_index = 0;
385 c->vertex_elems[0].nr_components = 2;
386 c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
387
388 /*
389 * Create our texcoord buffer and texcoord buffer element
390 * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
391 */
392 c->vertex_bufs[1].stride = sizeof(struct vertex2f);
393 c->vertex_bufs[1].max_index = 3;
394 c->vertex_bufs[1].buffer_offset = 0;
395 c->vertex_bufs[1].buffer = pipe_buffer_create
396 (
397 c->pipe->screen,
398 1,
399 PIPE_BUFFER_USAGE_VERTEX,
400 sizeof(struct vertex2f) * 4
401 );
402
403 memcpy
404 (
405 pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
406 surface_texcoords,
407 sizeof(struct vertex2f) * 4
408 );
409
410 pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
411
412 c->vertex_elems[1].src_offset = 0;
413 c->vertex_elems[1].vertex_buffer_index = 1;
414 c->vertex_elems[1].nr_components = 2;
415 c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
416
417 /*
418 * Create our vertex shader's constant buffer
419 * Const buffer contains scaling and translation vectors
420 */
421 c->vs_const_buf.buffer = pipe_buffer_create
422 (
423 c->pipe->screen,
424 1,
425 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
426 sizeof(struct vertex_shader_consts)
427 );
428
429 /*
430 * Create our fragment shader's constant buffer
431 * Const buffer contains the color conversion matrix and bias vectors
432 */
433 c->fs_const_buf.buffer = pipe_buffer_create
434 (
435 c->pipe->screen,
436 1,
437 PIPE_BUFFER_USAGE_CONSTANT,
438 sizeof(struct fragment_shader_consts)
439 );
440
441 /*
442 * TODO: Refactor this into a seperate function,
443 * allow changing the CSC matrix at runtime to switch between regular & full versions
444 */
445 memcpy
446 (
447 pipe_buffer_map(c->pipe->screen, c->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
448 &bt_601_full,
449 sizeof(struct fragment_shader_consts)
450 );
451
452 pipe_buffer_unmap(c->pipe->screen, c->fs_const_buf.buffer);
453
454 return true;
455 }
456
457 static void
458 cleanup_buffers(struct vl_compositor *c)
459 {
460 unsigned i;
461
462 assert(c);
463
464 for (i = 0; i < 2; ++i)
465 pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL);
466
467 pipe_buffer_reference(&c->vs_const_buf.buffer, NULL);
468 pipe_buffer_reference(&c->fs_const_buf.buffer, NULL);
469 }
470
471 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
472 {
473 assert(compositor);
474
475 memset(compositor, 0, sizeof(struct vl_compositor));
476
477 compositor->pipe = pipe;
478
479 if (!init_pipe_state(compositor))
480 return false;
481 if (!init_shaders(compositor)) {
482 cleanup_pipe_state(compositor);
483 return false;
484 }
485 if (!init_buffers(compositor)) {
486 cleanup_shaders(compositor);
487 cleanup_pipe_state(compositor);
488 return false;
489 }
490
491 return true;
492 }
493
494 void vl_compositor_cleanup(struct vl_compositor *compositor)
495 {
496 assert(compositor);
497
498 cleanup_buffers(compositor);
499 cleanup_shaders(compositor);
500 cleanup_pipe_state(compositor);
501 }
502
503 void vl_compositor_render(struct vl_compositor *compositor,
504 /*struct pipe_texture *backround,
505 struct pipe_video_rect *backround_area,*/
506 struct pipe_texture *src_surface,
507 enum pipe_mpeg12_picture_type picture_type,
508 /*unsigned num_past_surfaces,
509 struct pipe_texture *past_surfaces,
510 unsigned num_future_surfaces,
511 struct pipe_texture *future_surfaces,*/
512 struct pipe_video_rect *src_area,
513 struct pipe_texture *dst_surface,
514 struct pipe_video_rect *dst_area,
515 /*unsigned num_layers,
516 struct pipe_texture *layers,
517 struct pipe_video_rect *layer_src_areas,
518 struct pipe_video_rect *layer_dst_areas*/
519 struct pipe_fence_handle **fence)
520 {
521 struct vertex_shader_consts *vs_consts;
522
523 assert(compositor);
524 assert(src_surface);
525 assert(src_area);
526 assert(dst_surface);
527 assert(dst_area);
528 assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
529
530 compositor->fb_state.width = dst_surface->width[0];
531 compositor->fb_state.height = dst_surface->height[0];
532 compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
533 (
534 compositor->pipe->screen,
535 dst_surface,
536 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
537 );
538
539 compositor->viewport.scale[0] = compositor->fb_state.width;
540 compositor->viewport.scale[1] = compositor->fb_state.height;
541 compositor->viewport.scale[2] = 1;
542 compositor->viewport.scale[3] = 1;
543 compositor->viewport.translate[0] = 0;
544 compositor->viewport.translate[1] = 0;
545 compositor->viewport.translate[2] = 0;
546 compositor->viewport.translate[3] = 0;
547
548 compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
549 compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
550 compositor->pipe->bind_sampler_states(compositor->pipe, 1, &compositor->sampler);
551 compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface);
552 compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
553 compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
554 compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
555 compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
556 compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf);
557 compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf);
558
559 vs_consts = pipe_buffer_map
560 (
561 compositor->pipe->screen,
562 compositor->vs_const_buf.buffer,
563 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
564 );
565
566 vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width;
567 vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height;
568 vs_consts->dst_scale.z = 1;
569 vs_consts->dst_scale.w = 1;
570 vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width;
571 vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height;
572 vs_consts->dst_trans.z = 0;
573 vs_consts->dst_trans.w = 0;
574
575 vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
576 vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
577 vs_consts->src_scale.z = 1;
578 vs_consts->src_scale.w = 1;
579 vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
580 vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
581 vs_consts->src_trans.z = 0;
582 vs_consts->src_trans.w = 0;
583
584 pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer);
585
586 compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
587 compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
588
589 pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
590 }