1 /**************************************************************************
3 * Copyright 2010 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "vl_vertex_buffers.h"
30 #include "vl_defines.h"
31 #include "util/u_draw.h"
33 #include <pipe/p_context.h>
34 #include <pipe/p_screen.h>
35 #include <util/u_inlines.h>
36 #include <util/u_sampler.h>
37 #include <util/u_format.h>
38 #include <tgsi/tgsi_ureg.h>
41 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
43 #define NR_RENDER_TARGETS 4
54 static const float const_matrix
[8][8] = {
55 { 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.3535530f
, 0.353553f
, 0.3535530f
},
56 { 0.4903930f
, 0.4157350f
, 0.2777850f
, 0.0975451f
, -0.0975452f
, -0.2777850f
, -0.415735f
, -0.4903930f
},
57 { 0.4619400f
, 0.1913420f
, -0.1913420f
, -0.4619400f
, -0.4619400f
, -0.1913420f
, 0.191342f
, 0.4619400f
},
58 { 0.4157350f
, -0.0975452f
, -0.4903930f
, -0.2777850f
, 0.2777850f
, 0.4903930f
, 0.097545f
, -0.4157350f
},
59 { 0.3535530f
, -0.3535530f
, -0.3535530f
, 0.3535540f
, 0.3535530f
, -0.3535540f
, -0.353553f
, 0.3535530f
},
60 { 0.2777850f
, -0.4903930f
, 0.0975452f
, 0.4157350f
, -0.4157350f
, -0.0975451f
, 0.490393f
, -0.2777850f
},
61 { 0.1913420f
, -0.4619400f
, 0.4619400f
, -0.1913420f
, -0.1913410f
, 0.4619400f
, -0.461940f
, 0.1913420f
},
62 { 0.0975451f
, -0.2777850f
, 0.4157350f
, -0.4903930f
, 0.4903930f
, -0.4157350f
, 0.277786f
, -0.0975458f
}
66 calc_addr(struct ureg_program
*shader
, struct ureg_dst addr
[2],
67 struct ureg_src tc
, struct ureg_src start
, bool right_side
,
68 bool transposed
, float size
)
70 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
71 unsigned sw_start
= right_side
? TGSI_SWIZZLE_Y
: TGSI_SWIZZLE_X
;
73 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
74 unsigned sw_tc
= right_side
? TGSI_SWIZZLE_X
: TGSI_SWIZZLE_Y
;
77 * addr[0..1].(start) = right_side ? start.x : tc.x
78 * addr[0..1].(tc) = right_side ? tc.y : start.y
80 * addr[1].(start) += 1.0f / scale
82 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_start
), ureg_scalar(start
, sw_start
));
83 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_tc
), ureg_scalar(tc
, sw_tc
));
84 ureg_MOV(shader
, ureg_writemask(addr
[0], TGSI_WRITEMASK_Z
), tc
);
86 ureg_ADD(shader
, ureg_writemask(addr
[1], wm_start
), ureg_scalar(start
, sw_start
), ureg_imm1f(shader
, 1.0f
/ size
));
87 ureg_MOV(shader
, ureg_writemask(addr
[1], wm_tc
), ureg_scalar(tc
, sw_tc
));
88 ureg_MOV(shader
, ureg_writemask(addr
[1], TGSI_WRITEMASK_Z
), tc
);
92 create_vert_shader(struct vl_idct
*idct
, bool matrix_stage
, int color_swizzle
)
94 struct ureg_program
*shader
;
95 struct ureg_src vrect
, vpos
, vblock
, eb
[4];
96 struct ureg_src scale
, blocks_xy
, t_eb
;
97 struct ureg_dst t_tex
, t_start
;
98 struct ureg_dst o_vpos
, o_l_addr
[2], o_r_addr
[2];
101 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
105 t_tex
= ureg_DECL_temporary(shader
);
106 t_start
= ureg_DECL_temporary(shader
);
108 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
109 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
110 vblock
= ureg_swizzle(vrect
, TGSI_SWIZZLE_Z
, TGSI_SWIZZLE_W
, TGSI_SWIZZLE_X
, TGSI_SWIZZLE_X
);
112 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
114 eb
[0] = ureg_DECL_vs_input(shader
, VS_I_EB_0_0
);
115 eb
[1] = ureg_DECL_vs_input(shader
, VS_I_EB_1_0
);
116 eb
[2] = ureg_DECL_vs_input(shader
, VS_I_EB_0_1
);
117 eb
[3] = ureg_DECL_vs_input(shader
, VS_I_EB_1_1
);
119 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
120 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
122 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
);
123 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
);
126 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
127 * blocks_xy = (blocks_x, blocks_y)
129 * ar = vblock.y * blocks.x + vblock.x
130 * if eb[ar].(color_swizzle)
133 * t_tex = vpos * blocks_xy + vblock
134 * t_start = t_tex * scale
135 * t_tex = t_tex + vrect
136 * o_vpos.xy = t_tex * scale
138 * o_l_addr = calc_addr(...)
139 * o_r_addr = calc_addr(...)
145 scale
= ureg_imm2f(shader
,
146 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
147 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
149 blocks_xy
= ureg_imm2f(shader
, idct
->blocks_x
, idct
->blocks_y
);
151 if (idct
->blocks_x
> 1 || idct
->blocks_y
> 1) {
152 struct ureg_dst ar
= ureg_DECL_address(shader
);
154 ureg_MAD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_X
),
155 ureg_scalar(vblock
, TGSI_SWIZZLE_Y
), blocks_xy
, vblock
);
157 ureg_ARL(shader
, ureg_writemask(ar
, TGSI_WRITEMASK_X
), ureg_src(t_tex
));
158 t_eb
= ureg_src_indirect(eb
[0], ureg_src(ar
));
163 ureg_IF(shader
, ureg_scalar(t_eb
, color_swizzle
), &label
);
165 ureg_MOV(shader
, o_vpos
, ureg_imm1f(shader
, -1.0f
));
167 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
168 ureg_ELSE(shader
, &label
);
170 ureg_MAD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, blocks_xy
, vblock
);
171 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), scale
);
173 ureg_ADD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), vrect
);
175 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), scale
);
176 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_Z
),
177 ureg_scalar(vrect
, TGSI_SWIZZLE_X
),
178 ureg_imm1f(shader
, BLOCK_WIDTH
/ NR_RENDER_TARGETS
));
180 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
));
183 calc_addr(shader
, o_l_addr
, ureg_src(t_tex
), ureg_src(t_start
), false, false, idct
->buffer_width
/ 4);
184 calc_addr(shader
, o_r_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), true, true, BLOCK_WIDTH
/ 4);
186 calc_addr(shader
, o_l_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), false, false, BLOCK_WIDTH
/ 4);
187 calc_addr(shader
, o_r_addr
, ureg_src(t_tex
), ureg_src(t_start
), true, false, idct
->buffer_height
/ 4);
190 ureg_fixup_label(shader
, label
, ureg_get_instruction_number(shader
));
193 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), vpos
);
195 ureg_release_temporary(shader
, t_tex
);
196 ureg_release_temporary(shader
, t_start
);
200 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
204 increment_addr(struct ureg_program
*shader
, struct ureg_dst daddr
[2],
205 struct ureg_src saddr
[2], bool right_side
, bool transposed
,
208 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
209 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
212 * daddr[0..1].(start) = saddr[0..1].(start)
213 * daddr[0..1].(tc) = saddr[0..1].(tc)
216 ureg_MOV(shader
, ureg_writemask(daddr
[0], wm_start
), saddr
[0]);
217 ureg_ADD(shader
, ureg_writemask(daddr
[0], wm_tc
), saddr
[0], ureg_imm1f(shader
, pos
/ size
));
218 ureg_MOV(shader
, ureg_writemask(daddr
[1], wm_start
), saddr
[1]);
219 ureg_ADD(shader
, ureg_writemask(daddr
[1], wm_tc
), saddr
[1], ureg_imm1f(shader
, pos
/ size
));
223 fetch_four(struct ureg_program
*shader
, struct ureg_dst m
[2], struct ureg_src addr
[2], struct ureg_src sampler
)
225 ureg_TEX(shader
, m
[0], TGSI_TEXTURE_3D
, addr
[0], sampler
);
226 ureg_TEX(shader
, m
[1], TGSI_TEXTURE_3D
, addr
[1], sampler
);
230 matrix_mul(struct ureg_program
*shader
, struct ureg_dst dst
, struct ureg_dst l
[2], struct ureg_dst r
[2])
234 tmp
= ureg_DECL_temporary(shader
);
237 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
238 * dst = tmp.x + tmp.y
240 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(l
[0]), ureg_src(r
[0]));
241 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(l
[1]), ureg_src(r
[1]));
242 ureg_ADD(shader
, dst
,
243 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
),
244 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
246 ureg_release_temporary(shader
, tmp
);
250 create_matrix_frag_shader(struct vl_idct
*idct
)
252 struct ureg_program
*shader
;
254 struct ureg_src l_addr
[2], r_addr
[2];
256 struct ureg_dst l
[4][2], r
[2];
257 struct ureg_dst fragment
[NR_RENDER_TARGETS
];
261 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
265 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
266 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
268 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
269 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
271 for (i
= 0; i
< NR_RENDER_TARGETS
; ++i
)
272 fragment
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, i
);
274 for (i
= 0; i
< 4; ++i
) {
275 l
[i
][0] = ureg_DECL_temporary(shader
);
276 l
[i
][1] = ureg_DECL_temporary(shader
);
279 r
[0] = ureg_DECL_temporary(shader
);
280 r
[1] = ureg_DECL_temporary(shader
);
282 for (i
= 1; i
< 4; ++i
) {
283 increment_addr(shader
, l
[i
], l_addr
, false, false, i
, idct
->buffer_height
);
286 for (i
= 0; i
< 4; ++i
) {
287 struct ureg_src s_addr
[2];
288 s_addr
[0] = i
== 0 ? l_addr
[0] : ureg_src(l
[i
][0]);
289 s_addr
[1] = i
== 0 ? l_addr
[1] : ureg_src(l
[i
][1]);
290 fetch_four(shader
, l
[i
], s_addr
, ureg_DECL_sampler(shader
, 1));
293 for (i
= 0; i
< NR_RENDER_TARGETS
; ++i
) {
295 increment_addr(shader
, r
, r_addr
, true, true, i
, BLOCK_HEIGHT
);
297 struct ureg_src s_addr
[2] = { ureg_src(r
[0]), ureg_src(r
[1]) };
298 s_addr
[0] = i
== 0 ? r_addr
[0] : ureg_src(r
[0]);
299 s_addr
[1] = i
== 0 ? r_addr
[1] : ureg_src(r
[1]);
300 fetch_four(shader
, r
, s_addr
, ureg_DECL_sampler(shader
, 0));
302 for (j
= 0; j
< 4; ++j
) {
303 matrix_mul(shader
, ureg_writemask(fragment
[i
], TGSI_WRITEMASK_X
<< j
), l
[j
], r
);
307 for (i
= 0; i
< 4; ++i
) {
308 ureg_release_temporary(shader
, l
[i
][0]);
309 ureg_release_temporary(shader
, l
[i
][1]);
311 ureg_release_temporary(shader
, r
[0]);
312 ureg_release_temporary(shader
, r
[1]);
316 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
320 create_transpose_frag_shader(struct vl_idct
*idct
)
322 struct ureg_program
*shader
;
324 struct ureg_src l_addr
[2], r_addr
[2];
326 struct ureg_dst l
[2], r
[2];
327 struct ureg_dst fragment
;
329 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
333 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
334 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
336 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
337 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
339 l
[0] = ureg_DECL_temporary(shader
);
340 l
[1] = ureg_DECL_temporary(shader
);
341 r
[0] = ureg_DECL_temporary(shader
);
342 r
[1] = ureg_DECL_temporary(shader
);
344 fetch_four(shader
, l
, l_addr
, ureg_DECL_sampler(shader
, 0));
345 fetch_four(shader
, r
, r_addr
, ureg_DECL_sampler(shader
, 1));
347 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
349 matrix_mul(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_X
), l
, r
);
351 ureg_release_temporary(shader
, l
[0]);
352 ureg_release_temporary(shader
, l
[1]);
353 ureg_release_temporary(shader
, r
[0]);
354 ureg_release_temporary(shader
, r
[1]);
358 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
362 init_shaders(struct vl_idct
*idct
, int color_swizzle
)
364 idct
->matrix_vs
= create_vert_shader(idct
, true, color_swizzle
);
365 if (!idct
->matrix_vs
)
366 goto error_matrix_vs
;
368 idct
->matrix_fs
= create_matrix_frag_shader(idct
);
369 if (!idct
->matrix_fs
)
370 goto error_matrix_fs
;
372 idct
->transpose_vs
= create_vert_shader(idct
, false, color_swizzle
);
373 if (!idct
->transpose_vs
)
374 goto error_transpose_vs
;
376 idct
->transpose_fs
= create_transpose_frag_shader(idct
);
377 if (!idct
->transpose_fs
)
378 goto error_transpose_fs
;
383 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->transpose_vs
);
386 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->matrix_fs
);
389 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->matrix_vs
);
396 cleanup_shaders(struct vl_idct
*idct
)
398 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->matrix_vs
);
399 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->matrix_fs
);
400 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->transpose_vs
);
401 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->transpose_fs
);
405 init_state(struct vl_idct
*idct
)
407 struct pipe_sampler_state sampler
;
408 struct pipe_rasterizer_state rs_state
;
413 memset(&rs_state
, 0, sizeof(rs_state
));
414 rs_state
.gl_rasterization_rules
= false;
415 idct
->rs_state
= idct
->pipe
->create_rasterizer_state(idct
->pipe
, &rs_state
);
419 for (i
= 0; i
< 4; ++i
) {
420 memset(&sampler
, 0, sizeof(sampler
));
421 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
422 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
423 sampler
.wrap_r
= PIPE_TEX_WRAP_REPEAT
;
424 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
425 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
426 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
427 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
428 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
429 sampler
.normalized_coords
= 1;
430 idct
->samplers
.all
[i
] = idct
->pipe
->create_sampler_state(idct
->pipe
, &sampler
);
431 if (!idct
->samplers
.all
[i
])
438 for (i
= 0; i
< 4; ++i
)
439 if (idct
->samplers
.all
[i
])
440 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
.all
[i
]);
442 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
449 cleanup_state(struct vl_idct
*idct
)
453 for (i
= 0; i
< 4; ++i
)
454 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
.all
[i
]);
456 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
460 init_textures(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
462 struct pipe_resource
template;
463 struct pipe_sampler_view sampler_view
;
466 assert(idct
&& buffer
);
468 /* create textures */
469 memset(&template, 0, sizeof(struct pipe_resource
));
470 template.last_level
= 0;
471 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
474 template.target
= PIPE_TEXTURE_2D
;
475 template.format
= PIPE_FORMAT_R16G16B16A16_SNORM
;
476 template.width0
= idct
->buffer_width
/ 4;
477 template.height0
= idct
->buffer_height
;
479 template.array_size
= 1;
480 template.usage
= PIPE_USAGE_STREAM
;
481 buffer
->textures
.individual
.source
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
482 if (!buffer
->textures
.individual
.source
)
485 template.target
= PIPE_TEXTURE_3D
;
486 template.format
= PIPE_FORMAT_R16G16B16A16_SNORM
;
487 template.width0
= idct
->buffer_width
/ NR_RENDER_TARGETS
;
488 template.height0
= idct
->buffer_height
/ 4;
489 template.depth0
= NR_RENDER_TARGETS
;
490 template.usage
= PIPE_USAGE_STATIC
;
491 buffer
->textures
.individual
.intermediate
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
492 if (!buffer
->textures
.individual
.intermediate
)
495 for (i
= 0; i
< 4; ++i
) {
496 memset(&sampler_view
, 0, sizeof(sampler_view
));
497 u_sampler_view_default_template(&sampler_view
, buffer
->textures
.all
[i
], buffer
->textures
.all
[i
]->format
);
498 buffer
->sampler_views
.all
[i
] = idct
->pipe
->create_sampler_view(idct
->pipe
, buffer
->textures
.all
[i
], &sampler_view
);
499 if (!buffer
->sampler_views
.all
[i
])
503 template.target
= PIPE_TEXTURE_2D
;
504 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
505 template.format
= PIPE_FORMAT_R16_SNORM
;
506 template.width0
= idct
->buffer_width
;
507 template.height0
= idct
->buffer_height
;
510 buffer
->destination
= idct
->pipe
->screen
->resource_create(idct
->pipe
->screen
, &template);
511 if (!buffer
->destination
)
517 for (i
= 0; i
< 4; ++i
) {
518 pipe_sampler_view_reference(&buffer
->sampler_views
.all
[i
], NULL
);
519 pipe_resource_reference(&buffer
->textures
.all
[i
], NULL
);
525 cleanup_textures(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
529 assert(idct
&& buffer
);
531 for (i
= 0; i
< 4; ++i
) {
532 pipe_sampler_view_reference(&buffer
->sampler_views
.all
[i
], NULL
);
533 pipe_resource_reference(&buffer
->textures
.all
[i
], NULL
);
537 struct pipe_resource
*
538 vl_idct_upload_matrix(struct pipe_context
*pipe
)
540 const float scale
= sqrtf(SCALE_FACTOR_16_TO_9
);
542 struct pipe_resource
template, *matrix
;
543 struct pipe_transfer
*buf_transfer
;
544 unsigned i
, j
, pitch
;
547 struct pipe_box rect
=
557 memset(&template, 0, sizeof(struct pipe_resource
));
558 template.target
= PIPE_TEXTURE_2D
;
559 template.format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
560 template.last_level
= 0;
562 template.height0
= 8;
564 template.array_size
= 1;
565 template.usage
= PIPE_USAGE_IMMUTABLE
;
566 template.bind
= PIPE_BIND_SAMPLER_VIEW
;
569 matrix
= pipe
->screen
->resource_create(pipe
->screen
, &template);
573 buf_transfer
= pipe
->get_transfer
576 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
582 pitch
= buf_transfer
->stride
/ sizeof(float);
584 f
= pipe
->transfer_map(pipe
, buf_transfer
);
588 for(i
= 0; i
< BLOCK_HEIGHT
; ++i
)
589 for(j
= 0; j
< BLOCK_WIDTH
; ++j
)
590 // transpose and scale
591 f
[i
* pitch
+ j
] = const_matrix
[j
][i
] * scale
;
593 pipe
->transfer_unmap(pipe
, buf_transfer
);
594 pipe
->transfer_destroy(pipe
, buf_transfer
);
599 pipe
->transfer_destroy(pipe
, buf_transfer
);
602 pipe_resource_reference(&matrix
, NULL
);
608 bool vl_idct_init(struct vl_idct
*idct
, struct pipe_context
*pipe
,
609 unsigned buffer_width
, unsigned buffer_height
,
610 unsigned blocks_x
, unsigned blocks_y
,
611 int color_swizzle
, struct pipe_resource
*matrix
)
613 assert(idct
&& pipe
&& matrix
);
616 idct
->buffer_width
= buffer_width
;
617 idct
->buffer_height
= buffer_height
;
618 idct
->blocks_x
= blocks_x
;
619 idct
->blocks_y
= blocks_y
;
620 pipe_resource_reference(&idct
->matrix
, matrix
);
622 if(!init_shaders(idct
, color_swizzle
))
625 if(!init_state(idct
)) {
626 cleanup_shaders(idct
);
634 vl_idct_cleanup(struct vl_idct
*idct
)
636 cleanup_shaders(idct
);
639 pipe_resource_reference(&idct
->matrix
, NULL
);
642 struct pipe_resource
*
643 vl_idct_init_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
645 struct pipe_surface
template;
652 pipe_resource_reference(&buffer
->textures
.individual
.matrix
, idct
->matrix
);
653 pipe_resource_reference(&buffer
->textures
.individual
.transpose
, idct
->matrix
);
655 if (!init_textures(idct
, buffer
))
659 buffer
->viewport
[0].scale
[0] = buffer
->textures
.individual
.intermediate
->width0
;
660 buffer
->viewport
[0].scale
[1] = buffer
->textures
.individual
.intermediate
->height0
;
662 buffer
->viewport
[1].scale
[0] = buffer
->destination
->width0
;
663 buffer
->viewport
[1].scale
[1] = buffer
->destination
->height0
;
665 buffer
->fb_state
[0].width
= buffer
->textures
.individual
.intermediate
->width0
;
666 buffer
->fb_state
[0].height
= buffer
->textures
.individual
.intermediate
->height0
;
668 buffer
->fb_state
[0].nr_cbufs
= NR_RENDER_TARGETS
;
669 for(i
= 0; i
< NR_RENDER_TARGETS
; ++i
) {
670 memset(&template, 0, sizeof(template));
671 template.format
= buffer
->textures
.individual
.intermediate
->format
;
672 template.u
.tex
.first_layer
= i
;
673 template.u
.tex
.last_layer
= i
;
674 template.usage
= PIPE_BIND_RENDER_TARGET
;
675 buffer
->fb_state
[0].cbufs
[i
] = idct
->pipe
->create_surface(
676 idct
->pipe
, buffer
->textures
.individual
.intermediate
,
679 if (!buffer
->fb_state
[0].cbufs
[i
])
680 goto error_matrix_surfaces
;
683 buffer
->fb_state
[1].width
= buffer
->destination
->width0
;
684 buffer
->fb_state
[1].height
= buffer
->destination
->height0
;
686 buffer
->fb_state
[1].nr_cbufs
= 1;
688 memset(&template, 0, sizeof(template));
689 template.format
= buffer
->destination
->format
;
690 template.usage
= PIPE_BIND_RENDER_TARGET
;
691 buffer
->fb_state
[1].cbufs
[0] = idct
->pipe
->create_surface(
692 idct
->pipe
, buffer
->destination
, &template);
694 if (!buffer
->fb_state
[1].cbufs
[0])
695 goto error_transpose_surface
;
697 for(i
= 0; i
< 2; ++i
) {
698 buffer
->viewport
[i
].scale
[2] = 1;
699 buffer
->viewport
[i
].scale
[3] = 1;
700 buffer
->viewport
[i
].translate
[0] = 0;
701 buffer
->viewport
[i
].translate
[1] = 0;
702 buffer
->viewport
[i
].translate
[2] = 0;
703 buffer
->viewport
[i
].translate
[3] = 0;
705 buffer
->fb_state
[i
].zsbuf
= NULL
;
708 return buffer
->destination
;
710 error_transpose_surface
:
711 pipe_surface_reference(&buffer
->fb_state
[1].cbufs
[0], NULL
);
713 error_matrix_surfaces
:
714 for(i
= 0; i
< NR_RENDER_TARGETS
; ++i
)
715 pipe_surface_reference(&buffer
->fb_state
[0].cbufs
[i
], NULL
);
722 vl_idct_cleanup_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
726 assert(idct
&& buffer
);
728 for(i
= 0; i
< NR_RENDER_TARGETS
; ++i
)
729 pipe_surface_reference(&buffer
->fb_state
[0].cbufs
[i
], NULL
);
731 pipe_surface_reference(&buffer
->fb_state
[1].cbufs
[0], NULL
);
733 cleanup_textures(idct
, buffer
);
737 vl_idct_map_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
739 assert(idct
&& buffer
);
741 struct pipe_box rect
=
744 buffer
->textures
.individual
.source
->width0
,
745 buffer
->textures
.individual
.source
->height0
,
749 buffer
->tex_transfer
= idct
->pipe
->get_transfer
751 idct
->pipe
, buffer
->textures
.individual
.source
,
752 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
756 buffer
->texels
= idct
->pipe
->transfer_map(idct
->pipe
, buffer
->tex_transfer
);
760 vl_idct_add_block(struct vl_idct_buffer
*buffer
, unsigned x
, unsigned y
, short *block
)
770 tex_pitch
= buffer
->tex_transfer
->stride
/ sizeof(short);
771 texels
= buffer
->texels
+ y
* tex_pitch
* BLOCK_HEIGHT
+ x
* BLOCK_WIDTH
;
773 for (i
= 0; i
< BLOCK_HEIGHT
; ++i
)
774 memcpy(texels
+ i
* tex_pitch
, block
+ i
* BLOCK_WIDTH
, BLOCK_WIDTH
* sizeof(short));
778 vl_idct_unmap_buffers(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
780 assert(idct
&& buffer
);
782 idct
->pipe
->transfer_unmap(idct
->pipe
, buffer
->tex_transfer
);
783 idct
->pipe
->transfer_destroy(idct
->pipe
, buffer
->tex_transfer
);
787 vl_idct_flush(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
, unsigned num_instances
)
794 if(num_instances
> 0) {
795 num_verts
= idct
->blocks_x
* idct
->blocks_y
* 4;
797 idct
->pipe
->bind_rasterizer_state(idct
->pipe
, idct
->rs_state
);
800 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
[0]);
801 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
[0]);
802 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[0]);
803 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
.stage
[0]);
804 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->matrix_vs
);
805 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->matrix_fs
);
806 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_QUADS
, 0, num_verts
, 0, num_instances
);
809 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
[1]);
810 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
[1]);
811 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[1]);
812 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
.stage
[1]);
813 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->transpose_vs
);
814 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->transpose_fs
);
815 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_QUADS
, 0, num_verts
, 0, num_instances
);