1 /**************************************************************************
3 * Copyright 2010 Christian König
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include <pipe/p_context.h>
31 #include <pipe/p_screen.h>
33 #include <util/u_draw.h>
34 #include <util/u_sampler.h>
35 #include <util/u_memory.h>
37 #include <tgsi/tgsi_ureg.h>
39 #include "vl_defines.h"
41 #include "vl_vertex_buffers.h"
54 * The DCT matrix stored as hex representation of floats. Equal to the following equation:
55 * for (i = 0; i < 8; ++i)
56 * for (j = 0; j < 8; ++j)
57 * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
58 * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
60 static const uint32_t const_matrix
[8][8] = {
61 { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
62 { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
63 { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
64 { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
65 { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
66 { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
67 { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
68 { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
72 calc_addr(struct ureg_program
*shader
, struct ureg_dst addr
[2],
73 struct ureg_src tc
, struct ureg_src start
, bool right_side
,
74 bool transposed
, float size
)
76 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
77 unsigned sw_start
= right_side
? TGSI_SWIZZLE_Y
: TGSI_SWIZZLE_X
;
79 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
80 unsigned sw_tc
= right_side
? TGSI_SWIZZLE_X
: TGSI_SWIZZLE_Y
;
83 * addr[0..1].(start) = right_side ? start.x : tc.x
84 * addr[0..1].(tc) = right_side ? tc.y : start.y
86 * addr[1].(start) += 1.0f / scale
88 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_start
), ureg_scalar(start
, sw_start
));
89 ureg_MOV(shader
, ureg_writemask(addr
[0], wm_tc
), ureg_scalar(tc
, sw_tc
));
91 ureg_ADD(shader
, ureg_writemask(addr
[1], wm_start
), ureg_scalar(start
, sw_start
), ureg_imm1f(shader
, 1.0f
/ size
));
92 ureg_MOV(shader
, ureg_writemask(addr
[1], wm_tc
), ureg_scalar(tc
, sw_tc
));
96 increment_addr(struct ureg_program
*shader
, struct ureg_dst daddr
[2],
97 struct ureg_src saddr
[2], bool right_side
, bool transposed
,
100 unsigned wm_start
= (right_side
== transposed
) ? TGSI_WRITEMASK_X
: TGSI_WRITEMASK_Y
;
101 unsigned wm_tc
= (right_side
== transposed
) ? TGSI_WRITEMASK_Y
: TGSI_WRITEMASK_X
;
104 * daddr[0..1].(start) = saddr[0..1].(start)
105 * daddr[0..1].(tc) = saddr[0..1].(tc)
108 ureg_MOV(shader
, ureg_writemask(daddr
[0], wm_start
), saddr
[0]);
109 ureg_ADD(shader
, ureg_writemask(daddr
[0], wm_tc
), saddr
[0], ureg_imm1f(shader
, pos
/ size
));
110 ureg_MOV(shader
, ureg_writemask(daddr
[1], wm_start
), saddr
[1]);
111 ureg_ADD(shader
, ureg_writemask(daddr
[1], wm_tc
), saddr
[1], ureg_imm1f(shader
, pos
/ size
));
115 fetch_four(struct ureg_program
*shader
, struct ureg_dst m
[2], struct ureg_src addr
[2],
116 struct ureg_src sampler
, bool resource3d
)
118 ureg_TEX(shader
, m
[0], resource3d
? TGSI_TEXTURE_3D
: TGSI_TEXTURE_2D
, addr
[0], sampler
);
119 ureg_TEX(shader
, m
[1], resource3d
? TGSI_TEXTURE_3D
: TGSI_TEXTURE_2D
, addr
[1], sampler
);
123 matrix_mul(struct ureg_program
*shader
, struct ureg_dst dst
, struct ureg_dst l
[2], struct ureg_dst r
[2])
127 tmp
= ureg_DECL_temporary(shader
);
130 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
131 * dst = tmp.x + tmp.y
133 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_src(l
[0]), ureg_src(r
[0]));
134 ureg_DP4(shader
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_src(l
[1]), ureg_src(r
[1]));
135 ureg_ADD(shader
, dst
,
136 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_X
),
137 ureg_scalar(ureg_src(tmp
), TGSI_SWIZZLE_Y
));
139 ureg_release_temporary(shader
, tmp
);
143 create_mismatch_vert_shader(struct vl_idct
*idct
)
145 struct ureg_program
*shader
;
146 struct ureg_src vrect
, vpos
;
147 struct ureg_src scale
;
148 struct ureg_dst t_tex
;
149 struct ureg_dst o_vpos
, o_addr
[2];
151 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
155 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
156 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
158 t_tex
= ureg_DECL_temporary(shader
);
160 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
162 o_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
163 o_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
166 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
168 * t_vpos = vpos + 7 / BLOCK_WIDTH
169 * o_vpos.xy = t_vpos * scale
171 * o_addr = calc_addr(...)
175 scale
= ureg_imm2f(shader
,
176 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
177 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
179 ureg_MAD(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), vpos
, scale
, scale
);
180 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
182 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, scale
);
183 calc_addr(shader
, o_addr
, ureg_src(t_tex
), ureg_src(t_tex
), false, false, idct
->buffer_width
/ 4);
185 ureg_release_temporary(shader
, t_tex
);
189 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
193 create_mismatch_frag_shader(struct vl_idct
*idct
)
195 struct ureg_program
*shader
;
197 struct ureg_src addr
[2];
199 struct ureg_dst m
[8][2];
200 struct ureg_dst fragment
;
204 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
208 addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
209 addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
211 fragment
= ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, 0);
213 for (i
= 0; i
< 8; ++i
) {
214 m
[i
][0] = ureg_DECL_temporary(shader
);
215 m
[i
][1] = ureg_DECL_temporary(shader
);
218 for (i
= 0; i
< 8; ++i
) {
219 increment_addr(shader
, m
[i
], addr
, false, false, i
, idct
->buffer_height
);
222 for (i
= 0; i
< 8; ++i
) {
223 struct ureg_src s_addr
[2];
224 s_addr
[0] = ureg_src(m
[i
][0]);
225 s_addr
[1] = ureg_src(m
[i
][1]);
226 fetch_four(shader
, m
[i
], s_addr
, ureg_DECL_sampler(shader
, 0), false);
229 for (i
= 1; i
< 8; ++i
) {
230 ureg_ADD(shader
, m
[0][0], ureg_src(m
[0][0]), ureg_src(m
[i
][0]));
231 ureg_ADD(shader
, m
[0][1], ureg_src(m
[0][1]), ureg_src(m
[i
][1]));
234 ureg_ADD(shader
, m
[0][0], ureg_src(m
[0][0]), ureg_src(m
[0][1]));
235 ureg_DP4(shader
, m
[0][0], ureg_abs(ureg_src(m
[0][0])), ureg_imm1f(shader
, 1 << 14));
237 ureg_MUL(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_abs(ureg_src(m
[7][1])), ureg_imm1f(shader
, 1 << 14));
238 ureg_FRC(shader
, m
[0][0], ureg_src(m
[0][0]));
239 ureg_SGT(shader
, m
[0][0], ureg_imm1f(shader
, 0.5f
), ureg_abs(ureg_src(m
[0][0])));
241 ureg_CMP(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_negate(ureg_src(m
[0][0])),
242 ureg_imm1f(shader
, 1.0f
/ (1 << 15)), ureg_imm1f(shader
, -1.0f
/ (1 << 15)));
243 ureg_MUL(shader
, ureg_writemask(m
[0][0], TGSI_WRITEMASK_W
), ureg_src(m
[0][0]),
244 ureg_scalar(ureg_src(m
[0][0]), TGSI_SWIZZLE_X
));
246 ureg_MOV(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_XYZ
), ureg_src(m
[7][1]));
247 ureg_ADD(shader
, ureg_writemask(fragment
, TGSI_WRITEMASK_W
), ureg_src(m
[0][0]), ureg_src(m
[7][1]));
249 for (i
= 0; i
< 8; ++i
) {
250 ureg_release_temporary(shader
, m
[i
][0]);
251 ureg_release_temporary(shader
, m
[i
][1]);
256 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
260 create_stage1_vert_shader(struct vl_idct
*idct
)
262 struct ureg_program
*shader
;
263 struct ureg_src vrect
, vpos
;
264 struct ureg_src scale
;
265 struct ureg_dst t_tex
, t_start
;
266 struct ureg_dst o_vpos
, o_l_addr
[2], o_r_addr
[2];
268 shader
= ureg_create(TGSI_PROCESSOR_VERTEX
);
272 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
273 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
275 t_tex
= ureg_DECL_temporary(shader
);
276 t_start
= ureg_DECL_temporary(shader
);
278 o_vpos
= ureg_DECL_output(shader
, TGSI_SEMANTIC_POSITION
, VS_O_VPOS
);
280 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
);
281 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
);
283 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
);
284 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
);
287 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
289 * t_vpos = vpos + vrect
290 * o_vpos.xy = t_vpos * scale
293 * o_l_addr = calc_addr(...)
294 * o_r_addr = calc_addr(...)
298 scale
= ureg_imm2f(shader
,
299 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
300 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
302 ureg_ADD(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), vpos
, vrect
);
303 ureg_MUL(shader
, ureg_writemask(t_tex
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
), scale
);
305 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_XY
), ureg_src(t_tex
));
306 ureg_MOV(shader
, ureg_writemask(o_vpos
, TGSI_WRITEMASK_ZW
), ureg_imm1f(shader
, 1.0f
));
308 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), vpos
, scale
);
310 calc_addr(shader
, o_l_addr
, ureg_src(t_tex
), ureg_src(t_start
), false, false, idct
->buffer_width
/ 4);
311 calc_addr(shader
, o_r_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), true, true, BLOCK_WIDTH
/ 4);
313 ureg_release_temporary(shader
, t_tex
);
314 ureg_release_temporary(shader
, t_start
);
318 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
322 create_stage1_frag_shader(struct vl_idct
*idct
)
324 struct ureg_program
*shader
;
326 struct ureg_src l_addr
[2], r_addr
[2];
328 struct ureg_dst l
[4][2], r
[2];
329 struct ureg_dst
*fragment
;
333 shader
= ureg_create(TGSI_PROCESSOR_FRAGMENT
);
337 fragment
= MALLOC(idct
->nr_of_render_targets
* sizeof(struct ureg_dst
));
339 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
340 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
342 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
343 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
345 for (i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
346 fragment
[i
] = ureg_DECL_output(shader
, TGSI_SEMANTIC_COLOR
, i
);
348 for (i
= 0; i
< 4; ++i
) {
349 l
[i
][0] = ureg_DECL_temporary(shader
);
350 l
[i
][1] = ureg_DECL_temporary(shader
);
353 r
[0] = ureg_DECL_temporary(shader
);
354 r
[1] = ureg_DECL_temporary(shader
);
356 for (i
= 0; i
< 4; ++i
) {
357 increment_addr(shader
, l
[i
], l_addr
, false, false, i
- 2, idct
->buffer_height
);
360 for (i
= 0; i
< 4; ++i
) {
361 struct ureg_src s_addr
[2];
362 s_addr
[0] = ureg_src(l
[i
][0]);
363 s_addr
[1] = ureg_src(l
[i
][1]);
364 fetch_four(shader
, l
[i
], s_addr
, ureg_DECL_sampler(shader
, 0), false);
367 for (i
= 0; i
< idct
->nr_of_render_targets
; ++i
) {
368 struct ureg_src s_addr
[2];
370 increment_addr(shader
, r
, r_addr
, true, true, i
- (signed)idct
->nr_of_render_targets
/ 2, BLOCK_HEIGHT
);
372 s_addr
[0] = ureg_src(r
[0]);
373 s_addr
[1] = ureg_src(r
[1]);
374 fetch_four(shader
, r
, s_addr
, ureg_DECL_sampler(shader
, 1), false);
376 for (j
= 0; j
< 4; ++j
) {
377 matrix_mul(shader
, ureg_writemask(fragment
[i
], TGSI_WRITEMASK_X
<< j
), l
[j
], r
);
381 for (i
= 0; i
< 4; ++i
) {
382 ureg_release_temporary(shader
, l
[i
][0]);
383 ureg_release_temporary(shader
, l
[i
][1]);
385 ureg_release_temporary(shader
, r
[0]);
386 ureg_release_temporary(shader
, r
[1]);
392 return ureg_create_shader_and_destroy(shader
, idct
->pipe
);
396 vl_idct_stage2_vert_shader(struct vl_idct
*idct
, struct ureg_program
*shader
,
397 unsigned first_output
, struct ureg_dst tex
)
399 struct ureg_src vrect
, vpos
;
400 struct ureg_src scale
;
401 struct ureg_dst t_start
;
402 struct ureg_dst o_l_addr
[2], o_r_addr
[2];
404 vrect
= ureg_DECL_vs_input(shader
, VS_I_RECT
);
405 vpos
= ureg_DECL_vs_input(shader
, VS_I_VPOS
);
407 t_start
= ureg_DECL_temporary(shader
);
411 o_l_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_L_ADDR0
);
412 o_l_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_L_ADDR1
);
414 o_r_addr
[0] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_R_ADDR0
);
415 o_r_addr
[1] = ureg_DECL_output(shader
, TGSI_SEMANTIC_GENERIC
, first_output
+ VS_O_R_ADDR1
);
417 scale
= ureg_imm2f(shader
,
418 (float)BLOCK_WIDTH
/ idct
->buffer_width
,
419 (float)BLOCK_HEIGHT
/ idct
->buffer_height
);
421 ureg_MUL(shader
, ureg_writemask(tex
, TGSI_WRITEMASK_Z
),
422 ureg_scalar(vrect
, TGSI_SWIZZLE_X
),
423 ureg_imm1f(shader
, BLOCK_WIDTH
/ idct
->nr_of_render_targets
));
424 ureg_MUL(shader
, ureg_writemask(t_start
, TGSI_WRITEMASK_XY
), vpos
, scale
);
426 calc_addr(shader
, o_l_addr
, vrect
, ureg_imm1f(shader
, 0.0f
), false, false, BLOCK_WIDTH
/ 4);
427 calc_addr(shader
, o_r_addr
, ureg_src(tex
), ureg_src(t_start
), true, false, idct
->buffer_height
/ 4);
429 ureg_MOV(shader
, ureg_writemask(o_r_addr
[0], TGSI_WRITEMASK_Z
), ureg_src(tex
));
430 ureg_MOV(shader
, ureg_writemask(o_r_addr
[1], TGSI_WRITEMASK_Z
), ureg_src(tex
));
434 vl_idct_stage2_frag_shader(struct vl_idct
*idct
, struct ureg_program
*shader
,
435 unsigned first_input
, struct ureg_dst fragment
)
437 struct ureg_src l_addr
[2], r_addr
[2];
439 struct ureg_dst l
[2], r
[2];
443 l_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_L_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
444 l_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_L_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
446 r_addr
[0] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_R_ADDR0
, TGSI_INTERPOLATE_LINEAR
);
447 r_addr
[1] = ureg_DECL_fs_input(shader
, TGSI_SEMANTIC_GENERIC
, first_input
+ VS_O_R_ADDR1
, TGSI_INTERPOLATE_LINEAR
);
449 l
[0] = ureg_DECL_temporary(shader
);
450 l
[1] = ureg_DECL_temporary(shader
);
451 r
[0] = ureg_DECL_temporary(shader
);
452 r
[1] = ureg_DECL_temporary(shader
);
454 fetch_four(shader
, l
, l_addr
, ureg_DECL_sampler(shader
, 1), false);
455 fetch_four(shader
, r
, r_addr
, ureg_DECL_sampler(shader
, 0), true);
457 matrix_mul(shader
, fragment
, l
, r
);
459 ureg_release_temporary(shader
, l
[0]);
460 ureg_release_temporary(shader
, l
[1]);
461 ureg_release_temporary(shader
, r
[0]);
462 ureg_release_temporary(shader
, r
[1]);
466 init_shaders(struct vl_idct
*idct
)
468 idct
->vs_mismatch
= create_mismatch_vert_shader(idct
);
469 if (!idct
->vs_mismatch
)
470 goto error_vs_mismatch
;
472 idct
->fs_mismatch
= create_mismatch_frag_shader(idct
);
473 if (!idct
->fs_mismatch
)
474 goto error_fs_mismatch
;
476 idct
->vs
= create_stage1_vert_shader(idct
);
480 idct
->fs
= create_stage1_frag_shader(idct
);
487 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs
);
490 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs_mismatch
);
493 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->fs
);
500 cleanup_shaders(struct vl_idct
*idct
)
502 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs_mismatch
);
503 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->fs_mismatch
);
504 idct
->pipe
->delete_vs_state(idct
->pipe
, idct
->vs
);
505 idct
->pipe
->delete_fs_state(idct
->pipe
, idct
->fs
);
509 init_state(struct vl_idct
*idct
)
511 struct pipe_blend_state blend
;
512 struct pipe_rasterizer_state rs_state
;
513 struct pipe_sampler_state sampler
;
518 memset(&rs_state
, 0, sizeof(rs_state
));
519 rs_state
.point_size
= 1;
520 rs_state
.gl_rasterization_rules
= true;
521 idct
->rs_state
= idct
->pipe
->create_rasterizer_state(idct
->pipe
, &rs_state
);
525 memset(&blend
, 0, sizeof blend
);
527 blend
.independent_blend_enable
= 0;
528 blend
.rt
[0].blend_enable
= 0;
529 blend
.rt
[0].rgb_func
= PIPE_BLEND_ADD
;
530 blend
.rt
[0].rgb_src_factor
= PIPE_BLENDFACTOR_ONE
;
531 blend
.rt
[0].rgb_dst_factor
= PIPE_BLENDFACTOR_ONE
;
532 blend
.rt
[0].alpha_func
= PIPE_BLEND_ADD
;
533 blend
.rt
[0].alpha_src_factor
= PIPE_BLENDFACTOR_ONE
;
534 blend
.rt
[0].alpha_dst_factor
= PIPE_BLENDFACTOR_ONE
;
535 blend
.logicop_enable
= 0;
536 blend
.logicop_func
= PIPE_LOGICOP_CLEAR
;
537 /* Needed to allow color writes to FB, even if blending disabled */
538 blend
.rt
[0].colormask
= PIPE_MASK_RGBA
;
540 idct
->blend
= idct
->pipe
->create_blend_state(idct
->pipe
, &blend
);
544 for (i
= 0; i
< 2; ++i
) {
545 memset(&sampler
, 0, sizeof(sampler
));
546 sampler
.wrap_s
= PIPE_TEX_WRAP_REPEAT
;
547 sampler
.wrap_t
= PIPE_TEX_WRAP_REPEAT
;
548 sampler
.wrap_r
= PIPE_TEX_WRAP_REPEAT
;
549 sampler
.min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
550 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
551 sampler
.mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
552 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
553 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
554 sampler
.normalized_coords
= 1;
555 idct
->samplers
[i
] = idct
->pipe
->create_sampler_state(idct
->pipe
, &sampler
);
556 if (!idct
->samplers
[i
])
563 for (i
= 0; i
< 2; ++i
)
564 if (idct
->samplers
[i
])
565 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
[i
]);
567 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
570 idct
->pipe
->delete_blend_state(idct
->pipe
, idct
->blend
);
577 cleanup_state(struct vl_idct
*idct
)
581 for (i
= 0; i
< 2; ++i
)
582 idct
->pipe
->delete_sampler_state(idct
->pipe
, idct
->samplers
[i
]);
584 idct
->pipe
->delete_rasterizer_state(idct
->pipe
, idct
->rs_state
);
585 idct
->pipe
->delete_blend_state(idct
->pipe
, idct
->blend
);
589 init_source(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
591 struct pipe_resource
*tex
;
592 struct pipe_surface surf_templ
;
594 assert(idct
&& buffer
);
596 tex
= buffer
->sampler_views
.individual
.source
->texture
;
598 buffer
->fb_state_mismatch
.width
= tex
->width0
;
599 buffer
->fb_state_mismatch
.height
= tex
->height0
;
600 buffer
->fb_state_mismatch
.nr_cbufs
= 1;
602 memset(&surf_templ
, 0, sizeof(surf_templ
));
603 surf_templ
.format
= tex
->format
;
604 surf_templ
.u
.tex
.first_layer
= 0;
605 surf_templ
.u
.tex
.last_layer
= 0;
606 surf_templ
.usage
= PIPE_BIND_SAMPLER_VIEW
| PIPE_BIND_RENDER_TARGET
;
607 buffer
->fb_state_mismatch
.cbufs
[0] = idct
->pipe
->create_surface(idct
->pipe
, tex
, &surf_templ
);
609 buffer
->viewport_mismatch
.scale
[0] = tex
->width0
;
610 buffer
->viewport_mismatch
.scale
[1] = tex
->height0
;
611 buffer
->viewport_mismatch
.scale
[2] = 1;
612 buffer
->viewport_mismatch
.scale
[3] = 1;
618 cleanup_source(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
620 assert(idct
&& buffer
);
622 pipe_surface_reference(&buffer
->fb_state_mismatch
.cbufs
[0], NULL
);
624 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.source
, NULL
);
628 init_intermediate(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
630 struct pipe_resource
*tex
;
631 struct pipe_surface surf_templ
;
634 assert(idct
&& buffer
);
636 tex
= buffer
->sampler_views
.individual
.intermediate
->texture
;
638 buffer
->fb_state
.width
= tex
->width0
;
639 buffer
->fb_state
.height
= tex
->height0
;
640 buffer
->fb_state
.nr_cbufs
= idct
->nr_of_render_targets
;
641 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
) {
642 memset(&surf_templ
, 0, sizeof(surf_templ
));
643 surf_templ
.format
= tex
->format
;
644 surf_templ
.u
.tex
.first_layer
= i
;
645 surf_templ
.u
.tex
.last_layer
= i
;
646 surf_templ
.usage
= PIPE_BIND_SAMPLER_VIEW
| PIPE_BIND_RENDER_TARGET
;
647 buffer
->fb_state
.cbufs
[i
] = idct
->pipe
->create_surface(
648 idct
->pipe
, tex
, &surf_templ
);
650 if (!buffer
->fb_state
.cbufs
[i
])
654 buffer
->viewport
.scale
[0] = tex
->width0
;
655 buffer
->viewport
.scale
[1] = tex
->height0
;
656 buffer
->viewport
.scale
[2] = 1;
657 buffer
->viewport
.scale
[3] = 1;
662 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
663 pipe_surface_reference(&buffer
->fb_state
.cbufs
[i
], NULL
);
669 cleanup_intermediate(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
)
673 assert(idct
&& buffer
);
675 for(i
= 0; i
< idct
->nr_of_render_targets
; ++i
)
676 pipe_surface_reference(&buffer
->fb_state
.cbufs
[i
], NULL
);
678 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.intermediate
, NULL
);
681 struct pipe_sampler_view
*
682 vl_idct_upload_matrix(struct pipe_context
*pipe
, float scale
)
684 struct pipe_resource tex_templ
, *matrix
;
685 struct pipe_sampler_view sv_templ
, *sv
;
686 struct pipe_transfer
*buf_transfer
;
687 unsigned i
, j
, pitch
;
690 struct pipe_box rect
=
700 memset(&tex_templ
, 0, sizeof(tex_templ
));
701 tex_templ
.target
= PIPE_TEXTURE_2D
;
702 tex_templ
.format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
703 tex_templ
.last_level
= 0;
704 tex_templ
.width0
= 2;
705 tex_templ
.height0
= 8;
706 tex_templ
.depth0
= 1;
707 tex_templ
.array_size
= 1;
708 tex_templ
.usage
= PIPE_USAGE_IMMUTABLE
;
709 tex_templ
.bind
= PIPE_BIND_SAMPLER_VIEW
;
712 matrix
= pipe
->screen
->resource_create(pipe
->screen
, &tex_templ
);
716 buf_transfer
= pipe
->get_transfer
719 0, PIPE_TRANSFER_WRITE
| PIPE_TRANSFER_DISCARD
,
725 pitch
= buf_transfer
->stride
/ sizeof(float);
727 f
= pipe
->transfer_map(pipe
, buf_transfer
);
731 for(i
= 0; i
< BLOCK_HEIGHT
; ++i
)
732 for(j
= 0; j
< BLOCK_WIDTH
; ++j
)
733 // transpose and scale
734 f
[i
* pitch
+ j
] = ((const float (*)[8])const_matrix
)[j
][i
] * scale
;
736 pipe
->transfer_unmap(pipe
, buf_transfer
);
737 pipe
->transfer_destroy(pipe
, buf_transfer
);
739 memset(&sv_templ
, 0, sizeof(sv_templ
));
740 u_sampler_view_default_template(&sv_templ
, matrix
, matrix
->format
);
741 sv
= pipe
->create_sampler_view(pipe
, matrix
, &sv_templ
);
742 pipe_resource_reference(&matrix
, NULL
);
749 pipe
->transfer_destroy(pipe
, buf_transfer
);
752 pipe_resource_reference(&matrix
, NULL
);
758 bool vl_idct_init(struct vl_idct
*idct
, struct pipe_context
*pipe
,
759 unsigned buffer_width
, unsigned buffer_height
,
760 unsigned nr_of_render_targets
,
761 struct pipe_sampler_view
*matrix
,
762 struct pipe_sampler_view
*transpose
)
764 assert(idct
&& pipe
);
765 assert(matrix
&& transpose
);
768 idct
->buffer_width
= buffer_width
;
769 idct
->buffer_height
= buffer_height
;
770 idct
->nr_of_render_targets
= nr_of_render_targets
;
772 pipe_sampler_view_reference(&idct
->matrix
, matrix
);
773 pipe_sampler_view_reference(&idct
->transpose
, transpose
);
775 if(!init_shaders(idct
))
778 if(!init_state(idct
)) {
779 cleanup_shaders(idct
);
787 vl_idct_cleanup(struct vl_idct
*idct
)
789 cleanup_shaders(idct
);
792 pipe_sampler_view_reference(&idct
->matrix
, NULL
);
793 pipe_sampler_view_reference(&idct
->transpose
, NULL
);
797 vl_idct_init_buffer(struct vl_idct
*idct
, struct vl_idct_buffer
*buffer
,
798 struct pipe_sampler_view
*source
,
799 struct pipe_sampler_view
*intermediate
)
801 assert(buffer
&& idct
);
802 assert(source
&& intermediate
);
804 memset(buffer
, 0, sizeof(struct vl_idct_buffer
));
808 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.matrix
, idct
->matrix
);
809 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.source
, source
);
810 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.transpose
, idct
->transpose
);
811 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.intermediate
, intermediate
);
813 if (!init_source(idct
, buffer
))
816 if (!init_intermediate(idct
, buffer
))
823 vl_idct_cleanup_buffer(struct vl_idct_buffer
*buffer
)
827 cleanup_source(buffer
->idct
, buffer
);
828 cleanup_intermediate(buffer
->idct
, buffer
);
830 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.matrix
, NULL
);
831 pipe_sampler_view_reference(&buffer
->sampler_views
.individual
.transpose
, NULL
);
835 vl_idct_flush(struct vl_idct_buffer
*buffer
, unsigned num_instances
)
837 struct vl_idct
*idct
;
842 idct
->pipe
->bind_rasterizer_state(idct
->pipe
, idct
->rs_state
);
843 idct
->pipe
->bind_blend_state(idct
->pipe
, idct
->blend
);
844 idct
->pipe
->bind_fragment_sampler_states(idct
->pipe
, 2, idct
->samplers
);
845 idct
->pipe
->set_fragment_sampler_views(idct
->pipe
, 2, buffer
->sampler_views
.stage
[0]);
847 /* mismatch control */
848 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state_mismatch
);
849 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport_mismatch
);
850 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->vs_mismatch
);
851 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->fs_mismatch
);
852 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_POINTS
, 0, 1, 0, num_instances
);
855 idct
->pipe
->set_framebuffer_state(idct
->pipe
, &buffer
->fb_state
);
856 idct
->pipe
->set_viewport_state(idct
->pipe
, &buffer
->viewport
);
857 idct
->pipe
->bind_vs_state(idct
->pipe
, idct
->vs
);
858 idct
->pipe
->bind_fs_state(idct
->pipe
, idct
->fs
);
859 util_draw_arrays_instanced(idct
->pipe
, PIPE_PRIM_QUADS
, 0, 4, 0, num_instances
);
863 vl_idct_prepare_stage2(struct vl_idct_buffer
*buffer
)
868 buffer
->idct
->pipe
->bind_rasterizer_state(buffer
->idct
->pipe
, buffer
->idct
->rs_state
);
869 buffer
->idct
->pipe
->bind_fragment_sampler_states(buffer
->idct
->pipe
, 2, buffer
->idct
->samplers
);
870 buffer
->idct
->pipe
->set_fragment_sampler_views(buffer
->idct
->pipe
, 2, buffer
->sampler_views
.stage
[1]);