lima/ppir: enable vectorize optimization
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_video.c
index 5871f590e0e7d25f13b33391fa24cc98c15c6c6d..c5871f8a22111c3ddfb1dd34a9b7325eaff8bb19 100644 (file)
 #include "util/u_sampler.h"
 #include "util/u_format.h"
 
+static void
+nvc0_decoder_begin_frame(struct pipe_video_codec *decoder,
+                         struct pipe_video_buffer *target,
+                         struct pipe_picture_desc *picture)
+{
+   struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+   uint32_t comm_seq = ++dec->fence_seq;
+   ASSERTED unsigned ret = 0; /* used in debug checks */
+
+   assert(dec);
+   assert(target);
+   assert(target->buffer_format == PIPE_FORMAT_NV12);
+
+   ret = nvc0_decoder_bsp_begin(dec, comm_seq);
+
+   assert(ret == 2);
+}
+
 static void
 nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
                               struct pipe_video_buffer *video_target,
@@ -32,22 +50,35 @@ nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
                               unsigned num_buffers,
                               const void *const *data,
                               const unsigned *num_bytes)
+{
+   struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+   uint32_t comm_seq = dec->fence_seq;
+   ASSERTED unsigned ret = 0; /* used in debug checks */
+
+   assert(decoder);
+
+   ret = nvc0_decoder_bsp_next(dec, comm_seq, num_buffers, data, num_bytes);
+
+   assert(ret == 2);
+}
+
+static void
+nvc0_decoder_end_frame(struct pipe_video_codec *decoder,
+                       struct pipe_video_buffer *video_target,
+                       struct pipe_picture_desc *picture)
 {
    struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
    struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
-   uint32_t comm_seq = ++dec->fence_seq;
+   uint32_t comm_seq = dec->fence_seq;
    union pipe_desc desc;
 
-   unsigned vp_caps, is_ref, ret;
+   unsigned vp_caps, is_ref;
+   ASSERTED unsigned ret; /* used in debug checks */
    struct nouveau_vp3_video_buffer *refs[16] = {};
 
    desc.base = picture;
 
-   assert(target->base.buffer_format == PIPE_FORMAT_NV12);
-
-   ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
-                          num_buffers, data, num_bytes,
-                          &vp_caps, &is_ref, refs);
+   ret = nvc0_decoder_bsp_end(dec, desc, target, comm_seq, &vp_caps, &is_ref, refs);
 
    /* did we decode bitstream correctly? */
    assert(ret == 2);
@@ -164,25 +195,26 @@ nvc0_create_decoder(struct pipe_context *context,
    PUSH_DATA (push[2], dec->ppp->handle);
 
    dec->base.context = context;
+   dec->base.begin_frame = nvc0_decoder_begin_frame;
    dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
+   dec->base.end_frame = nvc0_decoder_end_frame;
 
    for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
       ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
                            0, 1 << 20, &cfg, &dec->bsp_bo[i]);
-   if (!ret)
+   if (!ret) {
+      /* total fudge factor... just has to be bigger for higher bitrates? */
+      unsigned inter_size = align(templ->width * templ->height * 2, 4 << 20);
       ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
-                           0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
+                           0x100, inter_size, &cfg, &dec->inter_bo[0]);
+   }
    if (!ret) {
-      if (!kepler)
-         nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
-      else
-         ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
-                              0x100, dec->inter_bo[0]->size, &cfg,
-                              &dec->inter_bo[1]);
+      ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+                           0x100, dec->inter_bo[0]->size, &cfg,
+                           &dec->inter_bo[1]);
    }
    if (ret)
       goto fail;
-
    switch (u_reduce_video_profile(templ->profile)) {
    case PIPE_VIDEO_FORMAT_MPEG12: {
       codec = 1;
@@ -266,7 +298,7 @@ nvc0_create_decoder(struct pipe_context *context,
    dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
 
    /* So lets test if the fence is working? */
-   nouveau_pushbuf_space(push[0], 6, 1, 0);
+   nouveau_pushbuf_space(push[0], 16, 1, 0);
    PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
    BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
    PUSH_DATAh(push[0], dec->fence_bo->offset);
@@ -277,7 +309,7 @@ nvc0_create_decoder(struct pipe_context *context,
    PUSH_DATA (push[0], 0);
    PUSH_KICK (push[0]);
 
-   nouveau_pushbuf_space(push[1], 6, 1, 0);
+   nouveau_pushbuf_space(push[1], 16, 1, 0);
    PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
    BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
    PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
@@ -288,7 +320,7 @@ nvc0_create_decoder(struct pipe_context *context,
    PUSH_DATA (push[1], 0);
    PUSH_KICK (push[1]);
 
-   nouveau_pushbuf_space(push[2], 6, 1, 0);
+   nouveau_pushbuf_space(push[2], 16, 1, 0);
    PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
    BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
    PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));