1 /**************************************************************************
3 * Copyright 2010 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include <pipe/p_context.h>
31 #include <pipe/p_screen.h>
32 #include <util/u_inlines.h>
33 #include <util/u_sampler.h>
34 #include <util/u_format.h>
35 #include <tgsi/tgsi_ureg.h>
39 #define BLOCK_HEIGHT 8
40 #define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
41 #define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f)
43 struct vertex_shader_consts
66 const float const_matrix
[8][8] = {
67 { 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.353553f
, 0.3535530f
},
68 { 0.4903930f
, 0.4157350f
, 0.2777850f
, 0.0975451f
, -0.0975452f
, -0.2777850f
, -0.415735f
, -0.4903930f
},
69 { 0.4619400f
, 0.1913420f
, -0.1913420f
, -0.4619400f
, -0.4619400f
, -0.1913420f
, 0.191342f
, 0.4619400f
},
70 { 0.4157350f
, -0.0975452f
, -0.4903930f
, -0.2777850f
, 0.2777850f
, 0.4903930f
, 0.097545f
, -0.4157350f
},
71 { 0.3535530f
, -0.3535530f
, -0.3535530f
, 0.3535540f
, 0.3535530f
, -0.3535540f
, -0.353553f
, 0.3535530f
},
72 { 0.2777850f
, -0.4903930f
, 0.0975452f
, 0.4157350f
, -0.4157350f
, -0.0975451f
, 0.490393f
, -0.2777850f
},
73 { 0.1913420f
, -0.4619400f
, 0.4619400f
, -0.1913420f
, -0.1913410f
, 0.4619400f
, -0.461940f
, 0.1913420f
},
74 { 0.0975451f
, -0.2777850f
, 0.4157350f
, -0.4903930f
, 0.4903930f
, -0.4157350f
, 0.277786f
, -0.0975458f
}
77 const float const_transpose
[8][8] = {
78 { 0.3535530f
, 0.4903930f
, 0.4619400f
, 0.4157350f
, 0.3535530f
, 0.2777850f
, 0.191342f
, 0.0975451f
},
79 { 0.3535530f
, 0.4157350f
, 0.1913420f
, -0.0975452f
, -0.3535530f
, -0.4903930f
, -0.461940f
, -0.2777850f
},
80 { 0.3535530f
, 0.2777850f
, -0.1913420f
, -0.4903930f
, -0.3535530f
, 0.0975452f
, 0.461940f
, 0.4157350f
},
81 { 0.3535530f
, 0.0975451f
, -0.4619400f
, -0.2777850f
, 0.3535540f
, 0.4157350f
, -0.191342f
, -0.4903930f
},
82 { 0.3535530f
, -0.0975452f
, -0.4619400f
, 0.2777850f
, 0.3535530f
, -0.4157350f
, -0.191341f
, 0.4903930f
},
83 { 0.3535530f
, -0.2777850f
, -0.1913420f
, 0.4903930f
, -0.3535540f
, -0.0975451f
, 0.461940f
, -0.4157350f
},
84 { 0.3535530f
, -0.4157350f
, 0.1913420f
, 0.0975450f
, -0.3535530f
, 0.4903930f
, -0.461940f
, 0.2777860f
},
85 { 0.3535530f
, -0.4903930f
, 0.4619400f
, -0.4157350f
, 0.3535530f
, -0.2777850f
, 0.191342f
, -0.0975458f
}
89 create_vert_shader(struct vl_idct
*idct
)
91 struct ureg_program
*shader
;
92 struct ureg_src norm
, bs
;
93 struct ureg_src vrect
, vpos
;
94 struct ureg_dst scale
, t_vpos
;
95 struct ureg_dst o_vpos
, o_block
, o_tex
, o_start
, o_step
;
97 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
101 norm
= ureg_DECL_constant(shader
, 0);
102 bs
= ureg_imm2f(shader
, BLOCK_WIDTH
, BLOCK_HEIGHT
);
104 scale
= ureg_DECL_temporary(shader
);
105 t_vpos
= ureg_DECL_temporary(shader
);
107 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
108 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
110 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
111 o_block
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_BLOCK
);
112 o_tex
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_TEX
);
113 o_start
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_START
);
114 o_step
= ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_STEP
);
117 * scale = norm * mbs;
119 * t_vpos = vpos + vrect
120 * o_vpos.xy = t_vpos * scale
125 * o_start = vpos * scale
129 ureg_MUL(shader
, ureg_writemask(scale
, TGSI_WRITEMASK_XY
), norm
, bs
);
131 ureg_ADD(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
132 ureg_MUL(shader
, ureg_writemask(t_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
), ureg_src(scale
));
133 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
134 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), vpos
);
136 ureg_MOV(shader
, ureg_writemask(o_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_vpos
));
137 ureg_MOV(shader
, ureg_writemask(o_block
, TGSI_WRITEMASK_XY
), vrect
);
138 ureg_MUL(shader
, ureg_writemask(o_start
, TGSI_WRITEMASK_XY
), vpos
, ureg_src(scale
));
139 ureg_MOV(shader
, ureg_writemask(o_step
, TGSI_WRITEMASK_XY
), norm
);
141 ureg_release_temporary(shader
, t_vpos
);
142 ureg_release_temporary(shader
, scale
);
146 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
150 matrix_mul(struct ureg_program
*shader
, struct ureg_dst dst
,
151 struct ureg_src tc
[2], struct ureg_src sampler
[2],
152 struct ureg_src start
[2], struct ureg_src step
[2],
155 struct ureg_dst t_tc
[2], m
[2][2], tmp
[2];
158 for(i
= 0; i
< 2; ++i
) {
159 t_tc
[i
] = ureg_DECL_temporary(shader
);
160 for(j
= 0; j
< 2; ++j
)
161 m
[i
][j
] = ureg_DECL_temporary(shader
);
162 tmp
[i
] = ureg_DECL_temporary(shader
);
167 * tmp[0..1] = dot4(m[0..1][0], m[0..1][1])
168 * fragment = tmp[0] + tmp[1]
170 ureg_MOV(shader
, ureg_writemask(t_tc
[0], TGSI_WRITEMASK_X
), start
[0]);
171 ureg_MOV(shader
, ureg_writemask(t_tc
[0], TGSI_WRITEMASK_Y
), tc
[0]);
173 ureg_MOV(shader
, ureg_writemask(t_tc
[1], TGSI_WRITEMASK_X
), tc
[1]);
174 ureg_MOV(shader
, ureg_writemask(t_tc
[1], TGSI_WRITEMASK_Y
), start
[1]);
176 for(i
= 0; i
< 2; ++i
) {
177 for(j
= 0; j
< 4; ++j
) {
178 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
179 ureg_TEX(shader
, tmp
[0], TGSI_TEXTURE_2D
, ureg_src(t_tc
[0]), sampler
[0]);
180 ureg_MOV(shader
, ureg_writemask(m
[i
][0], TGSI_WRITEMASK_X
<< j
), ureg_scalar(ureg_src(tmp
[0]), TGSI_SWIZZLE_X
));
182 ureg_TEX(shader
, tmp
[1], TGSI_TEXTURE_2D
, ureg_src(t_tc
[1]), sampler
[1]);
183 ureg_MOV(shader
, ureg_writemask(m
[i
][1], TGSI_WRITEMASK_X
<< j
), ureg_scalar(ureg_src(tmp
[1]), TGSI_SWIZZLE_X
));
185 ureg_ADD(shader
, ureg_writemask(t_tc
[0], TGSI_WRITEMASK_X
), ureg_src(t_tc
[0]), step
[0]);
186 ureg_ADD(shader
, ureg_writemask(t_tc
[1], TGSI_WRITEMASK_Y
), ureg_src(t_tc
[1]), step
[1]);
190 ureg_MUL(shader
, m
[i
][0], ureg_src(m
[i
][0]), ureg_scalar(ureg_imm1f(shader
, scale
[0]), TGSI_SWIZZLE_X
));
193 ureg_MUL(shader
, m
[i
][1], ureg_src(m
[i
][1]), ureg_scalar(ureg_imm1f(shader
, scale
[1]), TGSI_SWIZZLE_X
));
196 ureg_DP4(shader
, ureg_writemask(tmp
[0], TGSI_WRITEMASK_X
), ureg_src(m
[0][0]), ureg_src(m
[0][1]));
197 ureg_DP4(shader
, ureg_writemask(tmp
[1], TGSI_WRITEMASK_X
), ureg_src(m
[1][0]), ureg_src(m
[1][1]));
198 ureg_ADD(shader
, ureg_writemask(dst
, TGSI_WRITEMASK_X
), ureg_src(tmp
[0]), ureg_src(tmp
[1]));
200 for(i
= 0; i
< 2; ++i
) {
201 ureg_release_temporary(shader
, t_tc
[i
]);
202 for(j
= 0; j
< 2; ++j
)
203 ureg_release_temporary(shader
, m
[i
][j
]);
204 ureg_release_temporary(shader
, tmp
[i
]);
209 create_transpose_frag_shader(struct vl_idct
*idct
)
211 struct ureg_program
*shader
;
212 struct ureg_src tc
[2], sampler
[2];
213 struct ureg_src start
[2], step
[2];
214 struct ureg_dst fragment
;
217 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
221 tc
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_BLOCK
, TGSI_INTERPOLATE_LINEAR
);
222 tc
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX
, TGSI_INTERPOLATE_LINEAR
);
224 start
[0] = ureg_imm1f(shader
, 0.0f
);
225 start
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_START
, TGSI_INTERPOLATE_CONSTANT
);
227 step
[0] = ureg_imm1f(shader
, 1.0f
/ BLOCK_HEIGHT
);
228 step
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_STEP
, TGSI_INTERPOLATE_CONSTANT
);
230 sampler
[0] = ureg_DECL_sampler(shader
, 0);
231 sampler
[1] = ureg_DECL_sampler(shader
, 1);
234 scale
[1] = SCALE_FACTOR_16_TO_12
;
236 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
238 matrix_mul(shader
, fragment
, tc
, sampler
, start
, step
, scale
);
242 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
246 create_matrix_frag_shader(struct vl_idct
*idct
)
248 struct ureg_program
*shader
;
249 struct ureg_src tc
[2], sampler
[2];
250 struct ureg_src start
[2], step
[2];
251 struct ureg_dst tmp
, fragment
;
254 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
258 tmp
= ureg_DECL_temporary(shader
);
260 tc
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_TEX
, TGSI_INTERPOLATE_LINEAR
);
261 tc
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_BLOCK
, TGSI_INTERPOLATE_LINEAR
);
263 start
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_START
, TGSI_INTERPOLATE_CONSTANT
);
264 start
[1] = ureg_imm1f(shader
, 0.0f
);
266 step
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_STEP
, TGSI_INTERPOLATE_CONSTANT
);
267 step
[1] = ureg_imm1f(shader
, 1.0f
/ BLOCK_WIDTH
);
269 sampler
[0] = ureg_DECL_sampler(shader
, 0);
270 sampler
[1] = ureg_DECL_sampler(shader
, 1);
275 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
277 matrix_mul(shader
, tmp
, tc
, sampler
, start
, step
, scale
);
278 ureg_MUL(shader
, fragment
, ureg_src(tmp
), ureg_scalar(ureg_imm1f(shader
, SCALE_FACTOR_9_TO_16
), TGSI_SWIZZLE_X
));
282 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
286 xfer_buffers_map(struct vl_idct
*idct
)
288 struct pipe_box rect
=
291 idct
->destination
->width0
,
292 idct
->destination
->height0
,
296 idct
->tex_transfer
= idct
->pipe
->get_transfer
298 idct
->pipe
, idct
->destination
,
300 PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
304 idct
->texels
= idct
->pipe
->transfer_map(idct
->pipe
, idct
->tex_transfer
);
308 xfer_buffers_unmap(struct vl_idct
*idct
)
310 idct
->pipe
->transfer_unmap(idct
->pipe
, idct
->tex_transfer
);
311 idct
->pipe
->transfer_destroy(idct
->pipe
, idct
->tex_transfer
);
315 init_shaders(struct vl_idct
*idct
)
319 assert(idct
->vs
= create_vert_shader(idct
));
320 assert(idct
->transpose_fs
= create_transpose_frag_shader(idct
));
321 assert(idct
->matrix_fs
= create_matrix_frag_shader(idct
));
327 cleanup_shaders(struct vl_idct
*idct
)
331 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs
);
332 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->transpose_fs
);
333 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->matrix_fs
);
337 init_buffers(struct vl_idct
*idct
)
339 struct pipe_resource
template;
340 struct pipe_sampler_view sampler_view
;
341 struct pipe_vertex_element vertex_elems
[2];
343 const unsigned max_blocks
=
344 align(idct
->destination
->width0
, BLOCK_WIDTH
) / BLOCK_WIDTH
*
345 align(idct
->destination
->height0
, BLOCK_HEIGHT
) / BLOCK_HEIGHT
*
346 idct
->destination
->depth0
;
350 memset(&template, 0, sizeof(struct pipe_resource
));
351 template.target
= PIPE_TEXTURE_2D
;
352 template.format
= PIPE_FORMAT_R32_FLOAT
;
353 template.last_level
= 0;
355 template.height0
= 8;
357 template.usage
= PIPE_USAGE_IMMUTABLE
;
358 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
361 idct
->textures
.individual
.matrix
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
362 idct
->textures
.individual
.transpose
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
364 template.format
= idct
->destination
->format
;
365 template.width0
= idct
->destination
->width0
;
366 template.height0
= idct
->destination
->height0
;
367 template.depth0
= idct
->destination
->depth0
;
368 template.usage
= PIPE_USAGE_DYNAMIC
;
369 idct
->textures
.individual
.source
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
371 template.format
= PIPE_FORMAT_R32_FLOAT
;
372 template.usage
= PIPE_USAGE_STATIC
;
373 idct
->textures
.individual
.intermediate
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
375 for (i
= 0; i
< 4; ++i
) {
376 u_sampler_view_default_template(&sampler_view
, idct
->textures
.all
[i
], idct
->textures
.all
[i
]->format
);
377 idct
->sampler_views
.all
[i
] = idct
->pipe
->create_sampler_view(idct
->pipe
, idct
->textures
.all
[i
], &sampler_view
);
380 idct
->quad
.stride
= sizeof(struct vertex2f
);
381 idct
->quad
.max_index
= 4 * max_blocks
- 1;
382 idct
->quad
.buffer_offset
= 0;
383 idct
->quad
.buffer
= pipe_buffer_create
386 PIPE_BIND_VERTEX_BUFFER
,
387 sizeof(struct vertex2f
) * 4 * max_blocks
390 idct
->pos
.stride
= sizeof(struct vertex2f
);
391 idct
->pos
.max_index
= 4 * max_blocks
- 1;
392 idct
->pos
.buffer_offset
= 0;
393 idct
->pos
.buffer
= pipe_buffer_create
396 PIPE_BIND_VERTEX_BUFFER
,
397 sizeof(struct vertex2f
) * 4 * max_blocks
401 vertex_elems
[0].src_offset
= 0;
402 vertex_elems
[0].instance_divisor
= 0;
403 vertex_elems
[0].vertex_buffer_index
= 0;
404 vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
407 vertex_elems
[1].src_offset
= 0;
408 vertex_elems
[1].instance_divisor
= 0;
409 vertex_elems
[1].vertex_buffer_index
= 1;
410 vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
412 idct
->vertex_elems_state
= idct
->pipe
->create_vertex_elements_state(idct
->pipe
, 2, vertex_elems
);
414 idct
->vs_const_buf
= pipe_buffer_create
417 PIPE_BIND_CONSTANT_BUFFER
,
418 sizeof(struct vertex_shader_consts
)
425 cleanup_buffers(struct vl_idct
*idct
)
431 pipe_resource_reference(&idct
->vs_const_buf
, NULL
);
433 for (i
= 0; i
< 4; ++i
) {
434 pipe_sampler_view_reference(&idct
->sampler_views
.all
[i
], NULL
);
435 pipe_resource_reference(&idct
->textures
.all
[i
], NULL
);
438 idct
->pipe
->delete_vertex_elements_state(idct
->pipe
, idct
->vertex_elems_state
);
439 pipe_resource_reference(&idct
->quad
.buffer
, NULL
);
440 pipe_resource_reference(&idct
->pos
.buffer
, NULL
);
443 bool vl_idct_init(struct vl_idct
*idct
, struct pipe_context
*pipe
, struct pipe_resource
*dst
)
445 assert(idct
&& pipe
&& dst
);
449 idct
->viewport
.scale
[0] = dst
->width0
;
450 idct
->viewport
.scale
[1] = dst
->height0
;
451 idct
->viewport
.scale
[2] = 1;
452 idct
->viewport
.scale
[3] = 1;
453 idct
->viewport
.translate
[0] = 0;
454 idct
->viewport
.translate
[1] = 0;
455 idct
->viewport
.translate
[2] = 0;
456 idct
->viewport
.translate
[3] = 0;
458 idct
->fb_state
.width
= dst
->width0
;
459 idct
->fb_state
.height
= dst
->height0
;
460 idct
->fb_state
.nr_cbufs
= 1;
461 idct
->fb_state
.zsbuf
= NULL
;
463 pipe_resource_reference(&idct
->destination
, dst
);
465 if(!init_shaders(idct
))
468 if(!init_buffers(idct
)) {
469 cleanup_shaders(idct
);
473 xfer_buffers_map(idct
);
478 void vl_idct_cleanup(struct vl_idct
*idct
)
480 cleanup_shaders(idct
);
481 cleanup_buffers(idct
);
484 void vl_idct_add_block(struct vl_idct
*idct
, unsigned x
, unsigned y
, short *block
)
493 tex_pitch
= idct
->tex_transfer
->stride
/ util_format_get_blocksize(idct
->tex_transfer
->resource
->format
);
494 texels
= idct
->texels
+ y
* tex_pitch
* BLOCK_HEIGHT
+ x
* BLOCK_WIDTH
;
496 for (i
= 0; i
< BLOCK_HEIGHT
; ++i
)
497 memcpy(texels
+ i
* tex_pitch
, block
+ i
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
500 void vl_idct_flush(struct vl_idct
*idct
)
502 xfer_buffers_unmap(idct
);
504 xfer_buffers_map(idct
);