[g3dvl] implement mismatch control inside idct shaders
authorChristian König <deathsimple@vodafone.de>
Wed, 25 May 2011 23:50:44 +0000 (01:50 +0200)
committerChristian König <deathsimple@vodafone.de>
Sun, 29 May 2011 18:07:57 +0000 (20:07 +0200)
src/gallium/auxiliary/vl/vl_idct.c
src/gallium/auxiliary/vl/vl_idct.h
src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
src/gallium/auxiliary/vl/vl_mpeg12_decoder.c

index 602258ece86d8f6029c0fa307988ac89f30efbea..45180499e2eb0ce961d3d413cf418e99380c6eab 100644 (file)
@@ -138,6 +138,121 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
    ureg_release_temporary(shader, tmp);
 }
 
+static void *
+create_mismatch_vert_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_tex;
+   struct ureg_dst o_vpos, o_addr[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_tex = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+   /*
+    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = vpos + 7 / BLOCK_WIDTH
+    * o_vpos.xy = t_vpos * scale
+    *
+    * o_addr = calc_addr(...)
+    *
+    */
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
+   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
+
+   ureg_release_temporary(shader, t_tex);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_mismatch_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src addr[2];
+
+   struct ureg_dst m[8][2];
+   struct ureg_dst fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   for (i = 0; i < 8; ++i) {
+      m[i][0] = ureg_DECL_temporary(shader);
+      m[i][1] = ureg_DECL_temporary(shader);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) };
+      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
+   }
+
+   for (i = 1; i < 8; ++i) {
+      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
+      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
+   }
+
+   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
+   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
+
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
+   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
+   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
+
+   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
+            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
+            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
+
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
+
+   for (i = 0; i < 8; ++i) {
+      ureg_release_temporary(shader, m[i][0]);
+      ureg_release_temporary(shader, m[i][1]);
+   }
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
 static void *
 create_stage1_vert_shader(struct vl_idct *idct)
 {
@@ -239,14 +354,14 @@ create_stage1_frag_shader(struct vl_idct *idct)
 
    for (i = 0; i < 4; ++i) {
       struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
-      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1), false);
+      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
    }
 
    for (i = 0; i < idct->nr_of_render_targets; ++i) {
       increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
 
       struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
-      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0), false);
+      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -324,8 +439,8 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
    r[0] = ureg_DECL_temporary(shader);
    r[1] = ureg_DECL_temporary(shader);
 
-   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0), false);
-   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1), true);
+   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
+   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
 
    matrix_mul(shader, fragment, l, r);
 
@@ -338,6 +453,14 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
 static bool
 init_shaders(struct vl_idct *idct)
 {
+   idct->vs_mismatch = create_mismatch_vert_shader(idct);
+   if (!idct->vs_mismatch)
+      goto error_vs_mismatch;
+
+   idct->fs_mismatch = create_mismatch_frag_shader(idct);
+   if (!idct->fs_mismatch)
+      goto error_fs_mismatch;
+
    idct->vs = create_stage1_vert_shader(idct);
    if (!idct->vs)
       goto error_vs;
@@ -352,12 +475,20 @@ error_fs:
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
 
 error_vs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+
+error_fs_mismatch:
+   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
+
+error_vs_mismatch:
    return false;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->fs);
 }
@@ -373,6 +504,7 @@ init_state(struct vl_idct *idct)
    assert(idct);
 
    memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.point_size = 1;
    rs_state.gl_rasterization_rules = true;
    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
    if (!idct->rs_state)
@@ -441,6 +573,45 @@ cleanup_state(struct vl_idct *idct)
    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
 }
 
+static bool
+init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   struct pipe_resource *tex;
+   struct pipe_surface surf_templ;
+
+   assert(idct && buffer);
+
+   tex = buffer->sampler_views.individual.source->texture;
+
+   buffer->fb_state_mismatch.width = tex->width0;
+   buffer->fb_state_mismatch.height = tex->height0;
+   buffer->fb_state_mismatch.nr_cbufs = 1;
+
+   memset(&surf_templ, 0, sizeof(surf_templ));
+   surf_templ.format = tex->format;
+   surf_templ.u.tex.first_layer = 0;
+   surf_templ.u.tex.last_layer = 0;
+   surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
+
+   buffer->viewport_mismatch.scale[0] = tex->width0;
+   buffer->viewport_mismatch.scale[1] = tex->height0;
+   buffer->viewport_mismatch.scale[2] = 1;
+   buffer->viewport_mismatch.scale[3] = 1;
+
+   return true;
+}
+
+static void
+cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   assert(idct && buffer);
+
+   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
+}
+
 static bool
 init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
@@ -470,6 +641,8 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    buffer->viewport.scale[0] = tex->width0;
    buffer->viewport.scale[1] = tex->height0;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
 
    return true;
 
@@ -609,13 +782,11 @@ vl_idct_cleanup(struct vl_idct *idct)
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *source,
-                    struct pipe_sampler_view *intermediate,
-                    struct pipe_surface *destination)
+                    struct pipe_sampler_view *intermediate)
 {
    assert(buffer);
    assert(idct);
    assert(source);
-   assert(destination);
 
    memset(buffer, 0, sizeof(struct vl_idct_buffer));
 
@@ -624,15 +795,11 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
 
-   if (!init_intermediate(idct, buffer))
+   if (!init_source(idct, buffer))
       return false;
 
-   buffer->viewport.scale[2] = 1;
-   buffer->viewport.scale[3] = 1;
-   buffer->viewport.translate[0] = 0;
-   buffer->viewport.translate[1] = 0;
-   buffer->viewport.translate[2] = 0;
-   buffer->viewport.translate[3] = 0;
+   if (!init_intermediate(idct, buffer))
+      return false;
 
    return true;
 }
@@ -640,13 +807,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 void
 vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   unsigned i;
-
    assert(idct && buffer);
 
-   for(i = 0; i < idct->nr_of_render_targets; ++i)
-      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
-
+   cleanup_source(idct, buffer);
    cleanup_intermediate(idct, buffer);
 }
 
@@ -659,11 +822,18 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
    idct->pipe->bind_blend_state(idct->pipe, idct->blend);
    idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
+
+   /* mismatch control */
+   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
+   idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
+   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
 
    /* first stage */
    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
    idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
    idct->pipe->bind_vs_state(idct->pipe, idct->vs);
    idct->pipe->bind_fs_state(idct->pipe, idct->fs);
    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
index f5a1e5d9b73b15f63f8f2e78b442613230a33d98..119a53dbf27c68505d86b37eb9dc8fe2401c5d50 100644 (file)
@@ -48,6 +48,7 @@ struct vl_idct
 
    void *samplers[2];
 
+   void *vs_mismatch, *fs_mismatch;
    void *vs, *fs;
 
    struct pipe_sampler_view *matrix;
@@ -57,7 +58,10 @@ struct vl_idct
 /* a set of buffers to work with */
 struct vl_idct_buffer
 {
+   struct pipe_viewport_state viewport_mismatch;
    struct pipe_viewport_state viewport;
+
+   struct pipe_framebuffer_state fb_state_mismatch;
    struct pipe_framebuffer_state fb_state;
 
    union
@@ -65,8 +69,8 @@ struct vl_idct_buffer
       struct pipe_sampler_view *all[4];
       struct pipe_sampler_view *stage[2][2];
       struct {
-         struct pipe_sampler_view *matrix, *source;
-         struct pipe_sampler_view *transpose, *intermediate;
+         struct pipe_sampler_view *source, *matrix;
+         struct pipe_sampler_view *intermediate, *transpose;
       } individual;
    } sampler_views;
 };
@@ -99,8 +103,7 @@ vl_idct_cleanup(struct vl_idct *idct);
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *source,
-                    struct pipe_sampler_view *intermediate,
-                    struct pipe_surface *destination);
+                    struct pipe_sampler_view *intermediate);
 
 /* cleanup a buffer of an idct instance */
 void
index 508bb9fab19061c51e468240df233a6987bc0cf4..bf9b6cd6b1123cafed6188ca3c668b6795431cb6 100644 (file)
@@ -724,11 +724,9 @@ static inline void
 get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab *tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -751,7 +749,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -778,7 +775,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -811,7 +807,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       break;   /* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);       /* dump end of block code */
 }
 
@@ -819,11 +814,9 @@ static inline void
 get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab * tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -845,7 +838,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
             SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             bs->vlc.buf <<= 1;
             vl_vlc_needbits(&bs->vlc);
@@ -871,7 +863,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
             SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
@@ -905,7 +896,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       break;   /* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 4);       /* dump end of block code */
 }
 
@@ -913,11 +903,9 @@ static inline void
 get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab *tab;
 
    i = -1;
-   mismatch = 1;
 
    vl_vlc_needbits(&bs->vlc);
    if (bs->vlc.buf >= 0x28000000) {
@@ -946,7 +934,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -977,7 +964,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -1009,7 +995,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       }
       break;   /* illegal, check needed to avoid buffer overflow */
    }
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);       /* dump end of block code */
 }
 
index 4337e0833836e2faa1725d8cf474d4ac5d4e474b..f96d7f0e2b3b2f5503bf39fbc502eadefd0f0499 100644 (file)
@@ -147,7 +147,6 @@ static bool
 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
-   struct pipe_surface **idct_surfaces;
 
    struct vl_mpeg12_decoder *dec;
 
@@ -165,14 +164,10 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    if (!mc_source_sv)
       goto error_mc_source_sv;
 
-   idct_surfaces = dec->mc_source->get_surfaces(dec->mc_source);
-   if (!idct_surfaces)
-      goto error_surfaces;
-
    for (i = 0; i < 3; ++i)
       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
                                &buffer->idct[i], idct_source_sv[i],
-                               mc_source_sv[i], idct_surfaces[i]))
+                               mc_source_sv[i]))
          goto error_plane;
 
    return true;
@@ -181,7 +176,6 @@ error_plane:
    for (; i > 0; --i)
       vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
 
-error_surfaces:
 error_mc_source_sv:
 error_source_sv:
    return false;