1 /**************************************************************************
3 * Copyright 2010 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "vl_vertex_buffers.h"
30 #include "util/u_draw.h"
32 #include <pipe/p_context.h>
33 #include <pipe/p_screen.h>
34 #include <util/u_inlines.h>
35 #include <util/u_sampler.h>
36 #include <util/u_format.h>
37 #include <tgsi/tgsi_ureg.h>
41 #define BLOCK_HEIGHT 8
43 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
45 #define STAGE1_SCALE 4.0f
46 #define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
48 #define NR_RENDER_TARGETS 4
67 static const float const_matrix
[8][8] = {
68 { 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.353553f
, 0.3535530f
},
69 { 0.4903930f
, 0.4157350f
, 0.2777850f
, 0.0975451f
, -0.0975452f
, -0.2777850f
, -0.415735f
, -0.4903930f
},
70 { 0.4619400f
, 0.1913420f
, -0.1913420f
, -0.4619400f
, -0.4619400f
, -0.1913420f
, 0.191342f
, 0.4619400f
},
71 { 0.4157350f
, -0.0975452f
, -0.4903930f
, -0.2777850f
, 0.2777850f
, 0.4903930f
, 0.097545f
, -0.4157350f
},
72 { 0.3535530f
, -0.3535530f
, -0.3535530f
, 0.3535540f
, 0.3535530f
, -0.3535540f
, -0.353553f
, 0.3535530f
},
73 { 0.2777850f
, -0.4903930f
, 0.0975452f
, 0.4157350f
, -0.4157350f
, -0.0975451f
, 0.490393f
, -0.2777850f
},
74 { 0.1913420f
, -0.4619400f
, 0.4619400f
, -0.1913420f
, -0.1913410f
, 0.4619400f
, -0.461940f
, 0.1913420f
},
75 { 0.0975451f
, -0.2777850f
, 0.4157350f
, -0.4903930f
, 0.4903930f
, -0.4157350f
, 0.277786f
, -0.0975458f
}
79 calc_addr(struct ureg_program
*shader
, struct ureg_dst addr
[2],
80 struct ureg_src tc
, struct ureg_src start
, bool right_side
,
81 bool transposed
, float size
)
83 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
84 unsigned sw_start
= right_side
? TGSI_SWIZZLE_Y
: TGSI_SWIZZLE_X
;
86 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
87 unsigned sw_tc
= right_side
? TGSI_SWIZZLE_X
: TGSI_SWIZZLE_Y
;
90 * addr[0..1].(start) = right_side ? start.x : tc.x
91 * addr[0..1].(tc) = right_side ? tc.y : start.y
93 * addr[1].(start) += 1.0f / scale
95 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_start
), ureg_scalar(start
, sw_start
));
96 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_tc
), ureg_scalar(tc
, sw_tc
));
97 ureg_MOV(shader
, ureg_writemask(addr
[0], TGSI_WRITEMASK_Z
), tc
);
99 ureg_ADD(shader
, ureg_writemask(addr
[1], wm_start
), ureg_scalar(start
, sw_start
), ureg_imm1f(shader
, 1.0f
/ size
));
100 ureg_MOV(shader
, ureg_writemask(addr
[1], wm_tc
), ureg_scalar(tc
, sw_tc
));
101 ureg_MOV(shader
, ureg_writemask(addr
[1], TGSI_WRITEMASK_Z
), tc
);
105 create_vert_shader(struct vl_idct
*idct
, bool matrix_stage
)
107 struct ureg_program
*shader
;
108 struct ureg_src scale
;
109 struct ureg_src vrect
, vpos
;
110 struct ureg_dst t_tex
, t_start
;
111 struct ureg_dst o_vpos
, o_l_addr
[2], o_r_addr
[2];
113 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
117 t_tex
= ureg_DECL_temporary(shader
);
118 t_start
= ureg_DECL_temporary(shader
);
120 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
121 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
123 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
125 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
126 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
128 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
);
129 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
);
132 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
134 * t_vpos = vpos + vrect
135 * o_vpos.xy = t_vpos * scale
138 * o_l_addr = calc_addr(...)
139 * o_r_addr = calc_addr(...)
142 scale
= ureg_imm2f(shader
,
143 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
144 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
146 ureg_ADD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
147 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), scale
);
148 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
));
149 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), vpos
);
151 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_Z
),
152 ureg_scalar(vrect
, TGSI_SWIZZLE_X
),
153 ureg_imm1f(shader
, BLOCK_WIDTH
/ NR_RENDER_TARGETS
));
155 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), vpos
, scale
);
158 calc_addr(shader
, o_l_addr
, ureg_src(t_tex
), ureg_src(t_start
), false, false, idct
->buffer_width
/ 4);
159 calc_addr(shader
, o_r_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), true, true, BLOCK_WIDTH
/ 4);
161 calc_addr(shader
, o_l_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), false, false, BLOCK_WIDTH
/ 4);
162 calc_addr(shader
, o_r_addr
, ureg_src(t_tex
), ureg_src(t_start
), true, false, idct
->buffer_height
/ 4);
165 ureg_release_temporary(shader
, t_tex
);
166 ureg_release_temporary(shader
, t_start
);
170 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
174 increment_addr(struct ureg_program
*shader
, struct ureg_dst addr
[2],
175 bool right_side
, bool transposed
, float size
)
177 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
180 ureg_ADD(shader
, ureg_writemask(addr
[0], wm_tc
),
181 ureg_src(addr
[0]), ureg_imm1f(shader
, 1.0f
/ size
));
182 ureg_ADD(shader
, ureg_writemask(addr
[1], wm_tc
),
183 ureg_src(addr
[1]), ureg_imm1f(shader
, 1.0f
/ size
));
187 fetch_four(struct ureg_program
*shader
, struct ureg_dst m
[2], struct ureg_src addr
[2], struct ureg_src sampler
)
189 m
[0] = ureg_DECL_temporary(shader
);
190 m
[1] = ureg_DECL_temporary(shader
);
192 ureg_TEX(shader
, m
[0], TGSI_TEXTURE_3D
, addr
[0], sampler
);
193 ureg_TEX(shader
, m
[1], TGSI_TEXTURE_3D
, addr
[1], sampler
);
197 matrix_mul(struct ureg_program
*shader
, struct ureg_dst dst
, struct ureg_dst l
[2], struct ureg_dst r
[2])
201 tmp
= ureg_DECL_temporary(shader
);
204 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
205 * dst = tmp.x + tmp.y
207 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(l
[0]), ureg_src(r
[0]));
208 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(l
[1]), ureg_src(r
[1]));
209 ureg_ADD(shader
, dst
,
210 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
),
211 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
213 ureg_release_temporary(shader
, tmp
);
217 create_matrix_frag_shader(struct vl_idct
*idct
)
219 struct ureg_program
*shader
;
221 struct ureg_src l_addr
[2], r_addr
[2], saddr
[2];
223 struct ureg_dst addr
[2], l
[4][2], r
[2];
224 struct ureg_dst fragment
[NR_RENDER_TARGETS
];
228 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
232 addr
[0] = ureg_DECL_temporary(shader
);
233 addr
[1] = ureg_DECL_temporary(shader
);
235 saddr
[0] = ureg_src(addr
[0]);
236 saddr
[1] = ureg_src(addr
[1]);
238 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
239 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
241 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
242 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
244 for (i
= 0; i
< NR_RENDER_TARGETS
; ++i
)
245 fragment
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, i
);
247 for (i
= 0; i
< 4; ++i
) {
249 ureg_MOV(shader
, addr
[0], l_addr
[0]);
250 ureg_MOV(shader
, addr
[1], l_addr
[1]);
252 increment_addr(shader
, addr
, false, false, idct
->buffer_height
);
254 fetch_four(shader
, l
[i
], saddr
, ureg_DECL_sampler(shader
, 1));
257 for (i
= 0; i
< NR_RENDER_TARGETS
; ++i
) {
259 ureg_MOV(shader
, addr
[0], r_addr
[0]);
260 ureg_MOV(shader
, addr
[1], r_addr
[1]);
262 increment_addr(shader
, addr
, true, true, BLOCK_HEIGHT
);
264 fetch_four(shader
, r
, saddr
, ureg_DECL_sampler(shader
, 0));
266 for (j
= 0; j
< 4; ++j
) {
267 matrix_mul(shader
, ureg_writemask(fragment
[i
], TGSI_WRITEMASK_X
<< j
), l
[j
], r
);
269 ureg_release_temporary(shader
, r
[0]);
270 ureg_release_temporary(shader
, r
[1]);
273 for (i
= 0; i
< 4; ++i
) {
274 ureg_release_temporary(shader
, l
[i
][0]);
275 ureg_release_temporary(shader
, l
[i
][1]);
277 ureg_release_temporary(shader
, addr
[0]);
278 ureg_release_temporary(shader
, addr
[1]);
282 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
286 create_transpose_frag_shader(struct vl_idct
*idct
)
288 struct ureg_program
*shader
;
290 struct ureg_src l_addr
[2], r_addr
[2];
292 struct ureg_dst l
[2], r
[2];
293 struct ureg_dst tmp
, fragment
;
295 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
299 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
300 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
302 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
303 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
305 fetch_four(shader
, l
, l_addr
, ureg_DECL_sampler(shader
, 0));
306 fetch_four(shader
, r
, r_addr
, ureg_DECL_sampler(shader
, 1));
308 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
310 tmp
= ureg_DECL_temporary(shader
);
311 matrix_mul(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), l
, r
);
312 ureg_MUL(shader
, fragment
, ureg_src(tmp
), ureg_imm1f(shader
, STAGE2_SCALE
));
314 ureg_release_temporary(shader
, tmp
);
315 ureg_release_temporary(shader
, l
[0]);
316 ureg_release_temporary(shader
, l
[1]);
317 ureg_release_temporary(shader
, r
[0]);
318 ureg_release_temporary(shader
, r
[1]);
322 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
326 init_shaders(struct vl_idct
*idct
)
328 idct
->matrix_vs
= create_vert_shader(idct
, true);
329 idct
->matrix_fs
= create_matrix_frag_shader(idct
);
331 idct
->transpose_vs
= create_vert_shader(idct
, false);
332 idct
->transpose_fs
= create_transpose_frag_shader(idct
);
335 idct
->matrix_vs
!= NULL
&&
336 idct
->matrix_fs
!= NULL
&&
337 idct
->transpose_vs
!= NULL
&&
338 idct
->transpose_fs
!= NULL
;
342 cleanup_shaders(struct vl_idct
*idct
)
344 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->matrix_vs
);
345 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->matrix_fs
);
346 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->transpose_vs
);
347 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->transpose_fs
);
351 init_state(struct vl_idct
*idct
)
353 struct pipe_vertex_element vertex_elems
[NUM_VS_INPUTS
];
354 struct pipe_sampler_state sampler
;
355 struct pipe_rasterizer_state rs_state
;
360 idct
->quad
= vl_vb_upload_quads(idct
->pipe
, idct
->max_blocks
);
362 if(idct
->quad
.buffer
== NULL
)
365 for (i
= 0; i
< 4; ++i
) {
366 memset(&sampler
, 0, sizeof(sampler
));
367 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
368 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
369 sampler
.wrap_r
= PIPE_TEX_WRAP_REPEAT
;
370 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
371 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
372 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
373 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
374 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
375 sampler
.normalized_coords
= 1;
376 /*sampler.shadow_ambient = ; */
377 /*sampler.lod_bias = ; */
379 /*sampler.max_lod = ; */
380 /*sampler.border_color[0] = ; */
381 /*sampler.max_anisotropy = ; */
382 idct
->samplers
.all
[i
] = idct
->pipe
->create_sampler_state(idct
->pipe
, &sampler
);
385 memset(&rs_state
, 0, sizeof(rs_state
));
386 /*rs_state.sprite_coord_enable */
387 rs_state
.sprite_coord_mode
= PIPE_SPRITE_COORD_UPPER_LEFT
;
388 rs_state
.point_quad_rasterization
= true;
389 rs_state
.point_size
= BLOCK_WIDTH
;
390 rs_state
.gl_rasterization_rules
= false;
391 idct
->rs_state
= idct
->pipe
->create_rasterizer_state(idct
->pipe
, &rs_state
);
393 vertex_elems
[VS_I_RECT
] = vl_vb_get_quad_vertex_element();
396 vertex_elems
[VS_I_VPOS
].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
398 idct
->vertex_buffer_stride
= vl_vb_element_helper(&vertex_elems
[VS_I_VPOS
], 1, 1);
399 idct
->vertex_elems_state
= idct
->pipe
->create_vertex_elements_state(idct
->pipe
, 2, vertex_elems
);
405 cleanup_state(struct vl_idct
*idct
)
409 for (i
= 0; i
< 4; ++i
)
410 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
.all
[i
]);
412 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
413 idct
->pipe
->delete_vertex_elements_state(idct
->pipe
, idct
->vertex_elems_state
);
417 init_textures(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
419 struct pipe_resource
template;
420 struct pipe_sampler_view sampler_view
;
423 assert(idct
&& buffer
);
425 /* create textures */
426 memset(&template, 0, sizeof(struct pipe_resource
));
427 template.last_level
= 0;
429 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
432 template.target
= PIPE_TEXTURE_2D
;
433 template.format
= PIPE_FORMAT_R16G16B16A16_SNORM
;
434 template.width0
= idct
->buffer_width
/ 4;
435 template.height0
= idct
->buffer_height
;
437 template.usage
= PIPE_USAGE_STREAM
;
438 buffer
->textures
.individual
.source
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
440 template.target
= PIPE_TEXTURE_3D
;
441 template.format
= PIPE_FORMAT_R16G16B16A16_SNORM
;
442 template.width0
= idct
->buffer_width
/ NR_RENDER_TARGETS
;
443 template.height0
= idct
->buffer_height
/ 4;
444 template.depth0
= NR_RENDER_TARGETS
;
445 template.usage
= PIPE_USAGE_STATIC
;
446 buffer
->textures
.individual
.intermediate
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
448 for (i
= 0; i
< 4; ++i
) {
449 if(buffer
->textures
.all
[i
] == NULL
)
450 return false; /* a texture failed to allocate */
452 u_sampler_view_default_template(&sampler_view
, buffer
->textures
.all
[i
], buffer
->textures
.all
[i
]->format
);
453 buffer
->sampler_views
.all
[i
] = idct
->pipe
->create_sampler_view(idct
->pipe
, buffer
->textures
.all
[i
], &sampler_view
);
460 cleanup_textures(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
464 assert(idct
&& buffer
);
466 for (i
= 0; i
< 4; ++i
) {
467 pipe_sampler_view_reference(&buffer
->sampler_views
.all
[i
], NULL
);
468 pipe_resource_reference(&buffer
->textures
.all
[i
], NULL
);
473 init_vertex_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
475 assert(idct
&& buffer
);
477 buffer
->vertex_bufs
.individual
.quad
.stride
= idct
->quad
.stride
;
478 buffer
->vertex_bufs
.individual
.quad
.max_index
= idct
->quad
.max_index
;
479 buffer
->vertex_bufs
.individual
.quad
.buffer_offset
= idct
->quad
.buffer_offset
;
480 pipe_resource_reference(&buffer
->vertex_bufs
.individual
.quad
.buffer
, idct
->quad
.buffer
);
482 buffer
->vertex_bufs
.individual
.pos
= vl_vb_init(
483 &buffer
->blocks
, idct
->pipe
, idct
->max_blocks
, 2,
484 idct
->vertex_buffer_stride
);
486 if(buffer
->vertex_bufs
.individual
.pos
.buffer
== NULL
)
493 cleanup_vertex_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
495 assert(idct
&& buffer
);
497 pipe_resource_reference(&buffer
->vertex_bufs
.individual
.quad
.buffer
, NULL
);
498 pipe_resource_reference(&buffer
->vertex_bufs
.individual
.pos
.buffer
, NULL
);
500 vl_vb_cleanup(&buffer
->blocks
);
503 struct pipe_resource
*
504 vl_idct_upload_matrix(struct pipe_context
*pipe
)
506 struct pipe_resource
template, *matrix
;
507 struct pipe_transfer
*buf_transfer
;
508 unsigned i
, j
, pitch
;
511 struct pipe_box rect
=
519 memset(&template, 0, sizeof(struct pipe_resource
));
520 template.target
= PIPE_TEXTURE_2D
;
521 template.format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
522 template.last_level
= 0;
524 template.height0
= 8;
526 template.usage
= PIPE_USAGE_IMMUTABLE
;
527 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
530 matrix
= pipe
->screen
->resource_create(pipe
->screen
, &template);
533 buf_transfer
= pipe
->get_transfer
536 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
539 pitch
= buf_transfer
->stride
/ sizeof(float);
541 f
= pipe
->transfer_map(pipe
, buf_transfer
);
542 for(i
= 0; i
< BLOCK_HEIGHT
; ++i
)
543 for(j
= 0; j
< BLOCK_WIDTH
; ++j
)
544 // transpose and scale
545 f
[i
* pitch
+ j
] = const_matrix
[j
][i
] * STAGE1_SCALE
;
547 pipe
->transfer_unmap(pipe
, buf_transfer
);
548 pipe
->transfer_destroy(pipe
, buf_transfer
);
553 bool vl_idct_init(struct vl_idct
*idct
, struct pipe_context
*pipe
,
554 unsigned buffer_width
, unsigned buffer_height
,
555 struct pipe_resource
*matrix
)
557 assert(idct
&& pipe
&& matrix
);
560 idct
->buffer_width
= buffer_width
;
561 idct
->buffer_height
= buffer_height
;
562 pipe_resource_reference(&idct
->matrix
, matrix
);
565 align(buffer_width
, BLOCK_WIDTH
) / BLOCK_WIDTH
*
566 align(buffer_height
, BLOCK_HEIGHT
) / BLOCK_HEIGHT
;
568 if(!init_shaders(idct
))
571 if(!init_state(idct
)) {
572 cleanup_shaders(idct
);
580 vl_idct_cleanup(struct vl_idct
*idct
)
582 cleanup_shaders(idct
);
585 pipe_resource_reference(&idct
->matrix
, NULL
);
589 vl_idct_init_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
, struct pipe_resource
*dst
)
591 struct pipe_surface
template;
599 pipe_resource_reference(&buffer
->textures
.individual
.matrix
, idct
->matrix
);
600 pipe_resource_reference(&buffer
->textures
.individual
.transpose
, idct
->matrix
);
601 pipe_resource_reference(&buffer
->destination
, dst
);
603 if (!init_textures(idct
, buffer
))
606 if (!init_vertex_buffers(idct
, buffer
))
610 buffer
->viewport
[0].scale
[0] = buffer
->textures
.individual
.intermediate
->width0
;
611 buffer
->viewport
[0].scale
[1] = buffer
->textures
.individual
.intermediate
->height0
;
613 buffer
->viewport
[1].scale
[0] = buffer
->destination
->width0
;
614 buffer
->viewport
[1].scale
[1] = buffer
->destination
->height0
;
616 buffer
->fb_state
[0].width
= buffer
->textures
.individual
.intermediate
->width0
;
617 buffer
->fb_state
[0].height
= buffer
->textures
.individual
.intermediate
->height0
;
619 buffer
->fb_state
[0].nr_cbufs
= NR_RENDER_TARGETS
;
620 for(i
= 0; i
< NR_RENDER_TARGETS
; ++i
) {
621 memset(&template, 0, sizeof(template));
622 template.format
= buffer
->textures
.individual
.intermediate
->format
;
623 template.u
.tex
.first_layer
= i
;
624 template.u
.tex
.last_layer
= i
;
625 template.usage
= PIPE_BIND_RENDER_TARGET
;
626 buffer
->fb_state
[0].cbufs
[i
] = idct
->pipe
->create_surface(
627 idct
->pipe
, buffer
->textures
.individual
.intermediate
,
631 buffer
->fb_state
[1].width
= buffer
->destination
->width0
;
632 buffer
->fb_state
[1].height
= buffer
->destination
->height0
;
634 buffer
->fb_state
[1].nr_cbufs
= 1;
636 memset(&template, 0, sizeof(template));
637 template.format
= buffer
->destination
->format
;
638 template.usage
= PIPE_BIND_RENDER_TARGET
;
639 buffer
->fb_state
[1].cbufs
[0] = idct
->pipe
->create_surface(
640 idct
->pipe
, buffer
->destination
, &template);
642 for(i
= 0; i
< 2; ++i
) {
643 buffer
->viewport
[i
].scale
[2] = 1;
644 buffer
->viewport
[i
].scale
[3] = 1;
645 buffer
->viewport
[i
].translate
[0] = 0;
646 buffer
->viewport
[i
].translate
[1] = 0;
647 buffer
->viewport
[i
].translate
[2] = 0;
648 buffer
->viewport
[i
].translate
[3] = 0;
650 buffer
->fb_state
[i
].zsbuf
= NULL
;
657 vl_idct_cleanup_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
663 for(i
= 0; i
< NR_RENDER_TARGETS
; ++i
) {
664 idct
->pipe
->surface_destroy(idct
->pipe
, buffer
->fb_state
[0].cbufs
[i
]);
667 idct
->pipe
->surface_destroy(idct
->pipe
, buffer
->fb_state
[1].cbufs
[0]);
669 cleanup_textures(idct
, buffer
);
670 cleanup_vertex_buffers(idct
, buffer
);
674 vl_idct_map_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
678 struct pipe_box rect
=
681 buffer
->textures
.individual
.source
->width0
,
682 buffer
->textures
.individual
.source
->height0
,
686 buffer
->tex_transfer
= idct
->pipe
->get_transfer
688 idct
->pipe
, buffer
->textures
.individual
.source
,
689 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
693 buffer
->texels
= idct
->pipe
->transfer_map(idct
->pipe
, buffer
->tex_transfer
);
695 vl_vb_map(&buffer
->blocks
, idct
->pipe
);
699 vl_idct_add_block(struct vl_idct_buffer
*buffer
, unsigned x
, unsigned y
, short *block
)
709 tex_pitch
= buffer
->tex_transfer
->stride
/ sizeof(short);
710 texels
= buffer
->texels
+ y
* tex_pitch
* BLOCK_HEIGHT
+ x
* BLOCK_WIDTH
;
712 for (i
= 0; i
< BLOCK_HEIGHT
; ++i
)
713 memcpy(texels
+ i
* tex_pitch
, block
+ i
* BLOCK_WIDTH
, BLOCK_WIDTH
* sizeof(short));
717 vl_vb_add_block(&buffer
->blocks
, (float*)&v
);
721 vl_idct_unmap_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
723 assert(idct
&& buffer
);
725 idct
->pipe
->transfer_unmap(idct
->pipe
, buffer
->tex_transfer
);
726 idct
->pipe
->transfer_destroy(idct
->pipe
, buffer
->tex_transfer
);
727 vl_vb_unmap(&buffer
->blocks
, idct
->pipe
);
731 vl_idct_flush(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
737 num_verts
= vl_vb_restart(&buffer
->blocks
);
741 idct
->pipe
->bind_rasterizer_state(idct
->pipe
, idct
->rs_state
);
742 idct
->pipe
->set_vertex_buffers(idct
->pipe
, 2, buffer
->vertex_bufs
.all
);
743 idct
->pipe
->bind_vertex_elements_state(idct
->pipe
, idct
->vertex_elems_state
);
746 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
[0]);
747 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
[0]);
748 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[0]);
749 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
.stage
[0]);
750 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->matrix_vs
);
751 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->matrix_fs
);
752 util_draw_arrays(idct
->pipe
, PIPE_PRIM_QUADS
, 0, num_verts
);
755 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
[1]);
756 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
[1]);
757 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[1]);
758 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
.stage
[1]);
759 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->transpose_vs
);
760 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->transpose_fs
);
761 util_draw_arrays(idct
->pipe
, PIPE_PRIM_QUADS
, 0, num_verts
);