1 /**************************************************************************
3 * Copyright 2010 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include <pipe/p_context.h>
31 #include <pipe/p_screen.h>
33 #include <util/u_draw.h>
34 #include <util/u_sampler.h>
36 #include <tgsi/tgsi_ureg.h>
38 #include "vl_defines.h"
40 #include "vl_vertex_buffers.h"
53 * The DCT matrix stored as hex representation of floats. Equal to the following equation:
54 * for (i = 0; i < 8; ++i)
55 * for (j = 0; j < 8; ++j)
56 * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
57 * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
59 static const uint32_t const_matrix
[8][8] = {
60 { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
61 { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
62 { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
63 { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
64 { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
65 { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
66 { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
67 { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
71 calc_addr(struct ureg_program
*shader
, struct ureg_dst addr
[2],
72 struct ureg_src tc
, struct ureg_src start
, bool right_side
,
73 bool transposed
, float size
)
75 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
76 unsigned sw_start
= right_side
? TGSI_SWIZZLE_Y
: TGSI_SWIZZLE_X
;
78 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
79 unsigned sw_tc
= right_side
? TGSI_SWIZZLE_X
: TGSI_SWIZZLE_Y
;
82 * addr[0..1].(start) = right_side ? start.x : tc.x
83 * addr[0..1].(tc) = right_side ? tc.y : start.y
85 * addr[1].(start) += 1.0f / scale
87 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_start
), ureg_scalar(start
, sw_start
));
88 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_tc
), ureg_scalar(tc
, sw_tc
));
90 ureg_ADD(shader
, ureg_writemask(addr
[1], wm_start
), ureg_scalar(start
, sw_start
), ureg_imm1f(shader
, 1.0f
/ size
));
91 ureg_MOV(shader
, ureg_writemask(addr
[1], wm_tc
), ureg_scalar(tc
, sw_tc
));
95 increment_addr(struct ureg_program
*shader
, struct ureg_dst daddr
[2],
96 struct ureg_src saddr
[2], bool right_side
, bool transposed
,
99 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
100 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
103 * daddr[0..1].(start) = saddr[0..1].(start)
104 * daddr[0..1].(tc) = saddr[0..1].(tc)
107 ureg_MOV(shader
, ureg_writemask(daddr
[0], wm_start
), saddr
[0]);
108 ureg_ADD(shader
, ureg_writemask(daddr
[0], wm_tc
), saddr
[0], ureg_imm1f(shader
, pos
/ size
));
109 ureg_MOV(shader
, ureg_writemask(daddr
[1], wm_start
), saddr
[1]);
110 ureg_ADD(shader
, ureg_writemask(daddr
[1], wm_tc
), saddr
[1], ureg_imm1f(shader
, pos
/ size
));
114 fetch_four(struct ureg_program
*shader
, struct ureg_dst m
[2], struct ureg_src addr
[2],
115 struct ureg_src sampler
, bool resource3d
)
117 ureg_TEX(shader
, m
[0], resource3d
? TGSI_TEXTURE_3D
: TGSI_TEXTURE_2D
, addr
[0], sampler
);
118 ureg_TEX(shader
, m
[1], resource3d
? TGSI_TEXTURE_3D
: TGSI_TEXTURE_2D
, addr
[1], sampler
);
122 matrix_mul(struct ureg_program
*shader
, struct ureg_dst dst
, struct ureg_dst l
[2], struct ureg_dst r
[2])
126 tmp
= ureg_DECL_temporary(shader
);
129 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
130 * dst = tmp.x + tmp.y
132 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(l
[0]), ureg_src(r
[0]));
133 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(l
[1]), ureg_src(r
[1]));
134 ureg_ADD(shader
, dst
,
135 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
),
136 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
138 ureg_release_temporary(shader
, tmp
);
142 create_mismatch_vert_shader(struct vl_idct
*idct
)
144 struct ureg_program
*shader
;
145 struct ureg_src vrect
, vpos
;
146 struct ureg_src scale
;
147 struct ureg_dst t_tex
;
148 struct ureg_dst o_vpos
, o_addr
[2];
150 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
154 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
155 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
157 t_tex
= ureg_DECL_temporary(shader
);
159 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
161 o_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
162 o_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
165 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
167 * t_vpos = vpos + 7 / BLOCK_WIDTH
168 * o_vpos.xy = t_vpos * scale
170 * o_addr = calc_addr(...)
174 scale
= ureg_imm2f(shader
,
175 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
176 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
178 ureg_MAD(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), vpos
, scale
, scale
);
179 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
181 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, scale
);
182 calc_addr(shader
, o_addr
, ureg_src(t_tex
), ureg_src(t_tex
), false, false, idct
->buffer_width
/ 4);
184 ureg_release_temporary(shader
, t_tex
);
188 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
192 create_mismatch_frag_shader(struct vl_idct
*idct
)
194 struct ureg_program
*shader
;
196 struct ureg_src addr
[2];
198 struct ureg_dst m
[8][2];
199 struct ureg_dst fragment
;
203 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
207 addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
208 addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
210 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
212 for (i
= 0; i
< 8; ++i
) {
213 m
[i
][0] = ureg_DECL_temporary(shader
);
214 m
[i
][1] = ureg_DECL_temporary(shader
);
217 for (i
= 0; i
< 8; ++i
) {
218 increment_addr(shader
, m
[i
], addr
, false, false, i
, idct
->buffer_height
);
221 for (i
= 0; i
< 8; ++i
) {
222 struct ureg_src s_addr
[2] = { ureg_src(m
[i
][0]), ureg_src(m
[i
][1]) };
223 fetch_four(shader
, m
[i
], s_addr
, ureg_DECL_sampler(shader
, 0), false);
226 for (i
= 1; i
< 8; ++i
) {
227 ureg_ADD(shader
, m
[0][0], ureg_src(m
[0][0]), ureg_src(m
[i
][0]));
228 ureg_ADD(shader
, m
[0][1], ureg_src(m
[0][1]), ureg_src(m
[i
][1]));
231 ureg_ADD(shader
, m
[0][0], ureg_src(m
[0][0]), ureg_src(m
[0][1]));
232 ureg_DP4(shader
, m
[0][0], ureg_abs(ureg_src(m
[0][0])), ureg_imm1f(shader
, 1 << 14));
234 ureg_MUL(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_abs(ureg_src(m
[7][1])), ureg_imm1f(shader
, 1 << 14));
235 ureg_FRC(shader
, m
[0][0], ureg_src(m
[0][0]));
236 ureg_SGT(shader
, m
[0][0], ureg_imm1f(shader
, 0.5f
), ureg_abs(ureg_src(m
[0][0])));
238 ureg_CMP(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_negate(ureg_src(m
[0][0])),
239 ureg_imm1f(shader
, 1.0f
/ (1 << 15)), ureg_imm1f(shader
, -1.0f
/ (1 << 15)));
240 ureg_MUL(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_src(m
[0][0]),
241 ureg_scalar(ureg_src(m
[0][0]), TGSI_SWIZZLE_X
));
243 ureg_MOV(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_XYZ
), ureg_src(m
[7][1]));
244 ureg_ADD(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_W
), ureg_src(m
[0][0]), ureg_src(m
[7][1]));
246 for (i
= 0; i
< 8; ++i
) {
247 ureg_release_temporary(shader
, m
[i
][0]);
248 ureg_release_temporary(shader
, m
[i
][1]);
253 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
257 create_stage1_vert_shader(struct vl_idct
*idct
)
259 struct ureg_program
*shader
;
260 struct ureg_src vrect
, vpos
;
261 struct ureg_src scale
;
262 struct ureg_dst t_tex
, t_start
;
263 struct ureg_dst o_vpos
, o_l_addr
[2], o_r_addr
[2];
265 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
269 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
270 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
272 t_tex
= ureg_DECL_temporary(shader
);
273 t_start
= ureg_DECL_temporary(shader
);
275 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
277 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
278 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
280 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
);
281 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
);
284 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
286 * t_vpos = vpos + vrect
287 * o_vpos.xy = t_vpos * scale
290 * o_l_addr = calc_addr(...)
291 * o_r_addr = calc_addr(...)
295 scale
= ureg_imm2f(shader
,
296 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
297 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
299 ureg_ADD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
300 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), scale
);
302 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
));
303 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
305 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), vpos
, scale
);
307 calc_addr(shader
, o_l_addr
, ureg_src(t_tex
), ureg_src(t_start
), false, false, idct
->buffer_width
/ 4);
308 calc_addr(shader
, o_r_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), true, true, BLOCK_WIDTH
/ 4);
310 ureg_release_temporary(shader
, t_tex
);
311 ureg_release_temporary(shader
, t_start
);
315 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
319 create_stage1_frag_shader(struct vl_idct
*idct
)
321 struct ureg_program
*shader
;
323 struct ureg_src l_addr
[2], r_addr
[2];
325 struct ureg_dst l
[4][2], r
[2];
326 struct ureg_dst fragment
[idct
->nr_of_render_targets
];
330 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
334 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
335 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
337 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
338 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
340 for (i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
341 fragment
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, i
);
343 for (i
= 0; i
< 4; ++i
) {
344 l
[i
][0] = ureg_DECL_temporary(shader
);
345 l
[i
][1] = ureg_DECL_temporary(shader
);
348 r
[0] = ureg_DECL_temporary(shader
);
349 r
[1] = ureg_DECL_temporary(shader
);
351 for (i
= 0; i
< 4; ++i
) {
352 increment_addr(shader
, l
[i
], l_addr
, false, false, i
- 2, idct
->buffer_height
);
355 for (i
= 0; i
< 4; ++i
) {
356 struct ureg_src s_addr
[2] = { ureg_src(l
[i
][0]), ureg_src(l
[i
][1]) };
357 fetch_four(shader
, l
[i
], s_addr
, ureg_DECL_sampler(shader
, 0), false);
360 for (i
= 0; i
< idct
->nr_of_render_targets
; ++i
) {
361 increment_addr(shader
, r
, r_addr
, true, true, i
- (signed)idct
->nr_of_render_targets
/ 2, BLOCK_HEIGHT
);
363 struct ureg_src s_addr
[2] = { ureg_src(r
[0]), ureg_src(r
[1]) };
364 fetch_four(shader
, r
, s_addr
, ureg_DECL_sampler(shader
, 1), false);
366 for (j
= 0; j
< 4; ++j
) {
367 matrix_mul(shader
, ureg_writemask(fragment
[i
], TGSI_WRITEMASK_X
<< j
), l
[j
], r
);
371 for (i
= 0; i
< 4; ++i
) {
372 ureg_release_temporary(shader
, l
[i
][0]);
373 ureg_release_temporary(shader
, l
[i
][1]);
375 ureg_release_temporary(shader
, r
[0]);
376 ureg_release_temporary(shader
, r
[1]);
380 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
384 vl_idct_stage2_vert_shader(struct vl_idct
*idct
, struct ureg_program
*shader
,
385 unsigned first_output
, struct ureg_dst tex
)
387 struct ureg_src vrect
, vpos
;
388 struct ureg_src scale
;
389 struct ureg_dst t_start
;
390 struct ureg_dst o_l_addr
[2], o_r_addr
[2];
392 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
393 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
395 t_start
= ureg_DECL_temporary(shader
);
399 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_L_ADDR0
);
400 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_L_ADDR1
);
402 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_R_ADDR0
);
403 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_R_ADDR1
);
405 scale
= ureg_imm2f(shader
,
406 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
407 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
409 ureg_MUL(shader
, ureg_writemask(tex
, TGSI_WRITEMASK_Z
),
410 ureg_scalar(vrect
, TGSI_SWIZZLE_X
),
411 ureg_imm1f(shader
, BLOCK_WIDTH
/ idct
->nr_of_render_targets
));
412 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), vpos
, scale
);
414 calc_addr(shader
, o_l_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), false, false, BLOCK_WIDTH
/ 4);
415 calc_addr(shader
, o_r_addr
, ureg_src(tex
), ureg_src(t_start
), true, false, idct
->buffer_height
/ 4);
417 ureg_MOV(shader
, ureg_writemask(o_r_addr
[0], TGSI_WRITEMASK_Z
), ureg_src(tex
));
418 ureg_MOV(shader
, ureg_writemask(o_r_addr
[1], TGSI_WRITEMASK_Z
), ureg_src(tex
));
422 vl_idct_stage2_frag_shader(struct vl_idct
*idct
, struct ureg_program
*shader
,
423 unsigned first_input
, struct ureg_dst fragment
)
425 struct ureg_src l_addr
[2], r_addr
[2];
427 struct ureg_dst l
[2], r
[2];
431 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
432 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
434 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
435 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
437 l
[0] = ureg_DECL_temporary(shader
);
438 l
[1] = ureg_DECL_temporary(shader
);
439 r
[0] = ureg_DECL_temporary(shader
);
440 r
[1] = ureg_DECL_temporary(shader
);
442 fetch_four(shader
, l
, l_addr
, ureg_DECL_sampler(shader
, 1), false);
443 fetch_four(shader
, r
, r_addr
, ureg_DECL_sampler(shader
, 0), true);
445 matrix_mul(shader
, fragment
, l
, r
);
447 ureg_release_temporary(shader
, l
[0]);
448 ureg_release_temporary(shader
, l
[1]);
449 ureg_release_temporary(shader
, r
[0]);
450 ureg_release_temporary(shader
, r
[1]);
454 init_shaders(struct vl_idct
*idct
)
456 idct
->vs_mismatch
= create_mismatch_vert_shader(idct
);
457 if (!idct
->vs_mismatch
)
458 goto error_vs_mismatch
;
460 idct
->fs_mismatch
= create_mismatch_frag_shader(idct
);
461 if (!idct
->fs_mismatch
)
462 goto error_fs_mismatch
;
464 idct
->vs
= create_stage1_vert_shader(idct
);
468 idct
->fs
= create_stage1_frag_shader(idct
);
475 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs
);
478 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs_mismatch
);
481 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->fs
);
488 cleanup_shaders(struct vl_idct
*idct
)
490 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs_mismatch
);
491 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->fs_mismatch
);
492 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs
);
493 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->fs
);
497 init_state(struct vl_idct
*idct
)
499 struct pipe_blend_state blend
;
500 struct pipe_rasterizer_state rs_state
;
501 struct pipe_sampler_state sampler
;
506 memset(&rs_state
, 0, sizeof(rs_state
));
507 rs_state
.point_size
= 1;
508 rs_state
.gl_rasterization_rules
= true;
509 idct
->rs_state
= idct
->pipe
->create_rasterizer_state(idct
->pipe
, &rs_state
);
513 memset(&blend
, 0, sizeof blend
);
515 blend
.independent_blend_enable
= 0;
516 blend
.rt
[0].blend_enable
= 0;
517 blend
.rt
[0].rgb_func
= PIPE_BLEND_ADD
;
518 blend
.rt
[0].rgb_src_factor
= PIPE_BLENDFACTOR_ONE
;
519 blend
.rt
[0].rgb_dst_factor
= PIPE_BLENDFACTOR_ONE
;
520 blend
.rt
[0].alpha_func
= PIPE_BLEND_ADD
;
521 blend
.rt
[0].alpha_src_factor
= PIPE_BLENDFACTOR_ONE
;
522 blend
.rt
[0].alpha_dst_factor
= PIPE_BLENDFACTOR_ONE
;
523 blend
.logicop_enable
= 0;
524 blend
.logicop_func
= PIPE_LOGICOP_CLEAR
;
525 /* Needed to allow color writes to FB, even if blending disabled */
526 blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
528 idct
->blend
= idct
->pipe
->create_blend_state(idct
->pipe
, &blend
);
532 for (i
= 0; i
< 2; ++i
) {
533 memset(&sampler
, 0, sizeof(sampler
));
534 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
535 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
536 sampler
.wrap_r
= PIPE_TEX_WRAP_REPEAT
;
537 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
538 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
539 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
540 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
541 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
542 sampler
.normalized_coords
= 1;
543 idct
->samplers
[i
] = idct
->pipe
->create_sampler_state(idct
->pipe
, &sampler
);
544 if (!idct
->samplers
[i
])
551 for (i
= 0; i
< 2; ++i
)
552 if (idct
->samplers
[i
])
553 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
[i
]);
555 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
558 idct
->pipe
->delete_blend_state(idct
->pipe
, idct
->blend
);
565 cleanup_state(struct vl_idct
*idct
)
569 for (i
= 0; i
< 2; ++i
)
570 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
[i
]);
572 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
573 idct
->pipe
->delete_blend_state(idct
->pipe
, idct
->blend
);
577 init_source(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
579 struct pipe_resource
*tex
;
580 struct pipe_surface surf_templ
;
582 assert(idct
&& buffer
);
584 tex
= buffer
->sampler_views
.individual
.source
->texture
;
586 buffer
->fb_state_mismatch
.width
= tex
->width0
;
587 buffer
->fb_state_mismatch
.height
= tex
->height0
;
588 buffer
->fb_state_mismatch
.nr_cbufs
= 1;
590 memset(&surf_templ
, 0, sizeof(surf_templ
));
591 surf_templ
.format
= tex
->format
;
592 surf_templ
.u
.tex
.first_layer
= 0;
593 surf_templ
.u
.tex
.last_layer
= 0;
594 surf_templ
.usage
= PIPE_BIND_SAMPLER_VIEW
| PIPE_BIND_RENDER_TARGET
;
595 buffer
->fb_state_mismatch
.cbufs
[0] = idct
->pipe
->create_surface(idct
->pipe
, tex
, &surf_templ
);
597 buffer
->viewport_mismatch
.scale
[0] = tex
->width0
;
598 buffer
->viewport_mismatch
.scale
[1] = tex
->height0
;
599 buffer
->viewport_mismatch
.scale
[2] = 1;
600 buffer
->viewport_mismatch
.scale
[3] = 1;
606 cleanup_source(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
608 assert(idct
&& buffer
);
610 pipe_surface_reference(&buffer
->fb_state_mismatch
.cbufs
[0], NULL
);
612 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.source
, NULL
);
616 init_intermediate(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
618 struct pipe_resource
*tex
;
619 struct pipe_surface surf_templ
;
622 assert(idct
&& buffer
);
624 tex
= buffer
->sampler_views
.individual
.intermediate
->texture
;
626 buffer
->fb_state
.width
= tex
->width0
;
627 buffer
->fb_state
.height
= tex
->height0
;
628 buffer
->fb_state
.nr_cbufs
= idct
->nr_of_render_targets
;
629 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
) {
630 memset(&surf_templ
, 0, sizeof(surf_templ
));
631 surf_templ
.format
= tex
->format
;
632 surf_templ
.u
.tex
.first_layer
= i
;
633 surf_templ
.u
.tex
.last_layer
= i
;
634 surf_templ
.usage
= PIPE_BIND_SAMPLER_VIEW
| PIPE_BIND_RENDER_TARGET
;
635 buffer
->fb_state
.cbufs
[i
] = idct
->pipe
->create_surface(
636 idct
->pipe
, tex
, &surf_templ
);
638 if (!buffer
->fb_state
.cbufs
[i
])
642 buffer
->viewport
.scale
[0] = tex
->width0
;
643 buffer
->viewport
.scale
[1] = tex
->height0
;
644 buffer
->viewport
.scale
[2] = 1;
645 buffer
->viewport
.scale
[3] = 1;
650 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
651 pipe_surface_reference(&buffer
->fb_state
.cbufs
[i
], NULL
);
657 cleanup_intermediate(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
661 assert(idct
&& buffer
);
663 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
664 pipe_surface_reference(&buffer
->fb_state
.cbufs
[i
], NULL
);
666 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.intermediate
, NULL
);
669 struct pipe_sampler_view
*
670 vl_idct_upload_matrix(struct pipe_context
*pipe
, float scale
)
672 struct pipe_resource tex_templ
, *matrix
;
673 struct pipe_sampler_view sv_templ
, *sv
;
674 struct pipe_transfer
*buf_transfer
;
675 unsigned i
, j
, pitch
;
678 struct pipe_box rect
=
688 memset(&tex_templ
, 0, sizeof(tex_templ
));
689 tex_templ
.target
= PIPE_TEXTURE_2D
;
690 tex_templ
.format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
691 tex_templ
.last_level
= 0;
692 tex_templ
.width0
= 2;
693 tex_templ
.height0
= 8;
694 tex_templ
.depth0
= 1;
695 tex_templ
.array_size
= 1;
696 tex_templ
.usage
= PIPE_USAGE_IMMUTABLE
;
697 tex_templ
.bind
= PIPE_BIND_SAMPLER_VIEW
;
700 matrix
= pipe
->screen
->resource_create(pipe
->screen
, &tex_templ
);
704 buf_transfer
= pipe
->get_transfer
707 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
713 pitch
= buf_transfer
->stride
/ sizeof(float);
715 f
= pipe
->transfer_map(pipe
, buf_transfer
);
719 for(i
= 0; i
< BLOCK_HEIGHT
; ++i
)
720 for(j
= 0; j
< BLOCK_WIDTH
; ++j
)
721 // transpose and scale
722 f
[i
* pitch
+ j
] = ((const float (*)[8])const_matrix
)[j
][i
] * scale
;
724 pipe
->transfer_unmap(pipe
, buf_transfer
);
725 pipe
->transfer_destroy(pipe
, buf_transfer
);
727 memset(&sv_templ
, 0, sizeof(sv_templ
));
728 u_sampler_view_default_template(&sv_templ
, matrix
, matrix
->format
);
729 sv
= pipe
->create_sampler_view(pipe
, matrix
, &sv_templ
);
730 pipe_resource_reference(&matrix
, NULL
);
737 pipe
->transfer_destroy(pipe
, buf_transfer
);
740 pipe_resource_reference(&matrix
, NULL
);
746 bool vl_idct_init(struct vl_idct
*idct
, struct pipe_context
*pipe
,
747 unsigned buffer_width
, unsigned buffer_height
,
748 unsigned nr_of_render_targets
,
749 struct pipe_sampler_view
*matrix
,
750 struct pipe_sampler_view
*transpose
)
752 assert(idct
&& pipe
);
753 assert(matrix
&& transpose
);
756 idct
->buffer_width
= buffer_width
;
757 idct
->buffer_height
= buffer_height
;
758 idct
->nr_of_render_targets
= nr_of_render_targets
;
760 pipe_sampler_view_reference(&idct
->matrix
, matrix
);
761 pipe_sampler_view_reference(&idct
->transpose
, transpose
);
763 if(!init_shaders(idct
))
766 if(!init_state(idct
)) {
767 cleanup_shaders(idct
);
775 vl_idct_cleanup(struct vl_idct
*idct
)
777 cleanup_shaders(idct
);
780 pipe_sampler_view_reference(&idct
->matrix
, NULL
);
781 pipe_sampler_view_reference(&idct
->transpose
, NULL
);
785 vl_idct_init_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
,
786 struct pipe_sampler_view
*source
,
787 struct pipe_sampler_view
*intermediate
)
789 assert(buffer
&& idct
);
790 assert(source
&& intermediate
);
792 memset(buffer
, 0, sizeof(struct vl_idct_buffer
));
794 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.matrix
, idct
->matrix
);
795 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.source
, source
);
796 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.transpose
, idct
->transpose
);
797 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.intermediate
, intermediate
);
799 if (!init_source(idct
, buffer
))
802 if (!init_intermediate(idct
, buffer
))
809 vl_idct_cleanup_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
811 assert(idct
&& buffer
);
813 cleanup_source(idct
, buffer
);
814 cleanup_intermediate(idct
, buffer
);
816 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.matrix
, NULL
);
817 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.transpose
, NULL
);
821 vl_idct_flush(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
, unsigned num_instances
)
826 idct
->pipe
->bind_rasterizer_state(idct
->pipe
, idct
->rs_state
);
827 idct
->pipe
->bind_blend_state(idct
->pipe
, idct
->blend
);
828 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
);
829 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[0]);
831 /* mismatch control */
832 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state_mismatch
);
833 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport_mismatch
);
834 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->vs_mismatch
);
835 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->fs_mismatch
);
836 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_POINTS
, 0, 1, 0, num_instances
);
839 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
);
840 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
);
841 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->vs
);
842 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->fs
);
843 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_QUADS
, 0, 4, 0, num_instances
);
847 vl_idct_prepare_stage2(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
853 idct
->pipe
->bind_rasterizer_state(idct
->pipe
, idct
->rs_state
);
854 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
);
855 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[1]);