Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
index 0a81134a789b33fb03825d1d9f0f2000530174bf..ae80dc0a2748585a0030d9e73f7893f2969b6a5f 100644 (file)
@@ -43,9 +43,9 @@
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 #define STAGE1_SCALE 4.0f
-#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
+#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
 
-#define NR_RENDER_TARGETS 1
+#define NR_RENDER_TARGETS 4
 
 enum VS_INPUT
 {
@@ -119,7 +119,17 @@ create_vert_shader(struct vl_idct *idct)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+
    ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+#if NR_RENDER_TARGETS == 1
+   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#else
+   ureg_MUL(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), 
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+#endif
+
    ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    ureg_release_temporary(shader, t_vpos);
@@ -132,8 +142,8 @@ create_vert_shader(struct vl_idct *idct)
 static void
 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
            struct ureg_src tc, struct ureg_src sampler,
-           struct ureg_src start, struct ureg_src block,
-           bool right_side, bool transposed, float size)
+           struct ureg_src start, bool right_side,
+           bool transposed, float size)
 {
    struct ureg_dst t_tc;
    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
@@ -155,12 +165,7 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
       ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
       ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
    }
-
-#if NR_RENDER_TARGETS == 8
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
-#else
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-#endif
+   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), tc);
 
    ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
    ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
@@ -218,8 +223,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, l, block, sampler[0], start[0], block, false, false, BLOCK_WIDTH / 4);
-   fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, idct->buffer_height / 4);
+   fetch_four(shader, l, block, sampler[0], start[0], false, false, BLOCK_WIDTH / 4);
+   fetch_four(shader, r, tex, sampler[1], start[1], true, false, idct->buffer_height / 4);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -270,25 +275,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
    for (i = 0; i < 4; ++i) {
-      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
-      ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
-      ureg_MUL(shader, l[i][1], ureg_src(l[i][1]), ureg_imm1f(shader, STAGE1_SCALE));
-      if(i != 3)
+      if(i == 0)
+         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
+      else
          ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
             ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
+
+      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], false, false, idct->buffer_width / 4);
    }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
 
-#if NR_RENDER_TARGETS == 8
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
-#elif NR_RENDER_TARGETS == 1
-      fetch_four(shader, r, block, sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
+#if NR_RENDER_TARGETS == 1
+      fetch_four(shader, r, block, sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
 #else
-#error invalid number of render targets
+      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), 
+         ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
+         block);
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
 #endif
 
       for (j = 0; j < 4; ++j) {
@@ -496,7 +501,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    struct pipe_box rect =
    {
       0, 0, 0,
-      BLOCK_WIDTH,
+      BLOCK_WIDTH / 4,
       BLOCK_HEIGHT,
       1
    };
@@ -518,8 +523,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    buf_transfer = pipe->get_transfer
    (
       pipe, matrix,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
    pitch = buf_transfer->stride / sizeof(float);
@@ -527,7 +531,8 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    f = pipe->transfer_map(pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch + j] = const_matrix[j][i]; // transpose
+         // transpose and scale
+         f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
@@ -573,6 +578,8 @@ vl_idct_cleanup(struct vl_idct *idct)
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst)
 {
+   struct pipe_surface template;
+
    unsigned i;
 
    assert(buffer);
@@ -601,18 +608,26 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct
 
    buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
    for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      buffer->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
-         idct->pipe->screen, buffer->textures.individual.intermediate, 0, 0, i,
-         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+      memset(&template, 0, sizeof(template));
+      template.format = buffer->textures.individual.intermediate->format;
+      template.u.tex.first_layer = i;
+      template.u.tex.last_layer = i;
+      template.usage = PIPE_BIND_RENDER_TARGET;
+      buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
+         idct->pipe, buffer->textures.individual.intermediate,
+         &template);
    }
 
    buffer->fb_state[1].width = buffer->destination->width0;
    buffer->fb_state[1].height = buffer->destination->height0;
 
    buffer->fb_state[1].nr_cbufs = 1;
-   buffer->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
-      idct->pipe->screen, buffer->destination, 0, 0, 0,
-      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
+   memset(&template, 0, sizeof(template));
+   template.format = buffer->destination->format;
+   template.usage = PIPE_BIND_RENDER_TARGET;
+   buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface(
+      idct->pipe, buffer->destination, &template);
 
    for(i = 0; i < 2; ++i) {
       buffer->viewport[i].scale[2] = 1;
@@ -636,10 +651,10 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    assert(buffer);
 
    for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      idct->pipe->screen->tex_surface_destroy(buffer->fb_state[0].cbufs[i]);
+      idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[0].cbufs[i]);
    }
 
-   idct->pipe->screen->tex_surface_destroy(buffer->fb_state[1].cbufs[0]);
+   idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]);
 
    cleanup_textures(idct, buffer);
    cleanup_vertex_buffers(idct, buffer);
@@ -661,8 +676,7 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    buffer->tex_transfer = idct->pipe->get_transfer
    (
       idct->pipe, buffer->textures.individual.source,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );