#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
#define STAGE1_SCALE 4.0f
-#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
+#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
-#define NR_RENDER_TARGETS 1
+#define NR_RENDER_TARGETS 4
enum VS_INPUT
{
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+ ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+
ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+#if NR_RENDER_TARGETS == 1
+ ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#else
+ ureg_MUL(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z),
+ ureg_scalar(vrect, TGSI_SWIZZLE_X),
+ ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+#endif
+
ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
ureg_release_temporary(shader, t_vpos);
static void
fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
struct ureg_src tc, struct ureg_src sampler,
- struct ureg_src start, struct ureg_src block,
- bool right_side, bool transposed, float size)
+ struct ureg_src start, bool right_side,
+ bool transposed, float size)
{
struct ureg_dst t_tc;
unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
}
-
-#if NR_RENDER_TARGETS == 8
- ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
-#else
- ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-#endif
+ ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), tc);
ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
start[0] = ureg_imm1f(shader, 0.0f);
start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
- fetch_four(shader, l, block, sampler[0], start[0], block, false, false, BLOCK_WIDTH / 4);
- fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, idct->buffer_height / 4);
+ fetch_four(shader, l, block, sampler[0], start[0], false, false, BLOCK_WIDTH / 4);
+ fetch_four(shader, r, tex, sampler[1], start[1], true, false, idct->buffer_height / 4);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
for (i = 0; i < NR_RENDER_TARGETS; ++i)
fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
- ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
for (i = 0; i < 4; ++i) {
- fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
- ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
- ureg_MUL(shader, l[i][1], ureg_src(l[i][1]), ureg_imm1f(shader, STAGE1_SCALE));
- if(i != 3)
+ if(i == 0)
+ ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
+ else
ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
+
+ fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], false, false, idct->buffer_width / 4);
}
for (i = 0; i < NR_RENDER_TARGETS; ++i) {
-#if NR_RENDER_TARGETS == 8
- ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
- fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
-#elif NR_RENDER_TARGETS == 1
- fetch_four(shader, r, block, sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
+#if NR_RENDER_TARGETS == 1
+ fetch_four(shader, r, block, sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
#else
-#error invalid number of render targets
+ ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X),
+ ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
+ block);
+ fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
#endif
for (j = 0; j < 4; ++j) {
struct pipe_box rect =
{
0, 0, 0,
- BLOCK_WIDTH,
+ BLOCK_WIDTH / 4,
BLOCK_HEIGHT,
1
};
buf_transfer = pipe->get_transfer
(
pipe, matrix,
- u_subresource(0, 0),
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&rect
);
pitch = buf_transfer->stride / sizeof(float);
f = pipe->transfer_map(pipe, buf_transfer);
for(i = 0; i < BLOCK_HEIGHT; ++i)
for(j = 0; j < BLOCK_WIDTH; ++j)
- f[i * pitch + j] = const_matrix[j][i]; // transpose
+ // transpose and scale
+ f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
pipe->transfer_unmap(pipe, buf_transfer);
pipe->transfer_destroy(pipe, buf_transfer);
bool
vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst)
{
+ struct pipe_surface template;
+
unsigned i;
assert(buffer);
buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
for(i = 0; i < NR_RENDER_TARGETS; ++i) {
- buffer->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
- idct->pipe->screen, buffer->textures.individual.intermediate, 0, 0, i,
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+ memset(&template, 0, sizeof(template));
+ template.format = buffer->textures.individual.intermediate->format;
+ template.u.tex.first_layer = i;
+ template.u.tex.last_layer = i;
+ template.usage = PIPE_BIND_RENDER_TARGET;
+ buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
+ idct->pipe, buffer->textures.individual.intermediate,
+ &template);
}
buffer->fb_state[1].width = buffer->destination->width0;
buffer->fb_state[1].height = buffer->destination->height0;
buffer->fb_state[1].nr_cbufs = 1;
- buffer->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
- idct->pipe->screen, buffer->destination, 0, 0, 0,
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
+ memset(&template, 0, sizeof(template));
+ template.format = buffer->destination->format;
+ template.usage = PIPE_BIND_RENDER_TARGET;
+ buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface(
+ idct->pipe, buffer->destination, &template);
for(i = 0; i < 2; ++i) {
buffer->viewport[i].scale[2] = 1;
assert(buffer);
for(i = 0; i < NR_RENDER_TARGETS; ++i) {
- idct->pipe->screen->tex_surface_destroy(buffer->fb_state[0].cbufs[i]);
+ idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[0].cbufs[i]);
}
- idct->pipe->screen->tex_surface_destroy(buffer->fb_state[1].cbufs[0]);
+ idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]);
cleanup_textures(idct, buffer);
cleanup_vertex_buffers(idct, buffer);
buffer->tex_transfer = idct->pipe->get_transfer
(
idct->pipe, buffer->textures.individual.source,
- u_subresource(0, 0),
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&rect
);