From 9ba7eac535d309332c4149b544ccf7e87f88c723 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Sun, 2 Dec 2012 12:07:35 +0100 Subject: [PATCH] nvc0: add support for accelerated video decoding through the dedicated engines Currently the use of external firmware is required, with kernel and userspace firmware needed for all Fermi cards except nvd9. Kepler and nvd9 should only require kernel firmware. --- configure.ac | 2 +- src/gallium/drivers/nvc0/Makefile.sources | 5 +- src/gallium/drivers/nvc0/nvc0_video.c | 427 +++++++++++++- src/gallium/drivers/nvc0/nvc0_video.h | 169 +++++- src/gallium/drivers/nvc0/nvc0_video_bsp.c | 423 ++++++++++++++ src/gallium/drivers/nvc0/nvc0_video_ppp.c | 145 +++++ src/gallium/drivers/nvc0/nvc0_video_vp.c | 667 ++++++++++++++++++++++ 7 files changed, 1834 insertions(+), 4 deletions(-) create mode 100644 src/gallium/drivers/nvc0/nvc0_video_bsp.c create mode 100644 src/gallium/drivers/nvc0/nvc0_video_ppp.c create mode 100644 src/gallium/drivers/nvc0/nvc0_video_vp.c diff --git a/configure.ac b/configure.ac index 99a08fd060f..ba4c203e5f9 100644 --- a/configure.ac +++ b/configure.ac @@ -33,7 +33,7 @@ LIBDRM_REQUIRED=2.4.24 LIBDRM_RADEON_REQUIRED=2.4.40 LIBDRM_INTEL_REQUIRED=2.4.38 LIBDRM_NVVIEUX_REQUIRED=2.4.33 -LIBDRM_NOUVEAU_REQUIRED=2.4.33 +LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41" DRI2PROTO_REQUIRED=2.6 GLPROTO_REQUIRED=1.4.14 LIBDRM_XORG_REQUIRED=2.4.24 diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources index 12eedf96231..33b90f290fb 100644 --- a/src/gallium/drivers/nvc0/Makefile.sources +++ b/src/gallium/drivers/nvc0/Makefile.sources @@ -14,4 +14,7 @@ C_SOURCES := \ nvc0_program.c \ nvc0_shader_state.c \ nvc0_query.c \ - nvc0_video.c + nvc0_video.c \ + nvc0_video_bsp.c \ + nvc0_video_vp.c \ + nvc0_video_ppp.c diff --git a/src/gallium/drivers/nvc0/nvc0_video.c b/src/gallium/drivers/nvc0/nvc0_video.c index 5cf16e79b2b..cdb80dba064 100644 --- a/src/gallium/drivers/nvc0/nvc0_video.c +++ b/src/gallium/drivers/nvc0/nvc0_video.c @@ -1,5 +1,5 @@ /* - * Copyright 2011 Maarten Lankhorst + * Copyright 2011-2013 Maarten Lankhorst * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -54,6 +54,116 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen, } } +static void +nvc0_decoder_decode_bitstream(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *video_target, + struct pipe_picture_desc *picture, + unsigned num_buffers, + const void *const *data, + const unsigned *num_bytes) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + struct nvc0_video_buffer *target = (struct nvc0_video_buffer *)video_target; + uint32_t comm_seq = ++dec->fence_seq; + union pipe_desc desc; + + unsigned vp_caps, is_ref, ret; + struct nvc0_video_buffer *refs[16] = {}; + + desc.base = picture; + + assert(target->base.buffer_format == PIPE_FORMAT_NV12); + + ret = nvc0_decoder_bsp(dec, desc, target, comm_seq, + num_buffers, data, num_bytes, + &vp_caps, &is_ref, refs); + + /* did we decode bitstream correctly? */ + assert(ret == 2); + + nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + nvc0_decoder_ppp(dec, desc, target, comm_seq); +} + +static void +nvc0_decoder_flush(struct pipe_video_decoder *decoder) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + (void)dec; +} + +static void +nvc0_decoder_begin_frame(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nvc0_decoder_end_frame(struct pipe_video_decoder *decoder, + struct pipe_video_buffer *target, + struct pipe_picture_desc *picture) +{ +} + +static void +nvc0_decoder_destroy(struct pipe_video_decoder *decoder) +{ + struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder; + int i; + + nouveau_bo_ref(NULL, &dec->ref_bo); + nouveau_bo_ref(NULL, &dec->bitplane_bo); + nouveau_bo_ref(NULL, &dec->inter_bo[0]); + nouveau_bo_ref(NULL, &dec->inter_bo[1]); +#ifdef NVC0_DEBUG_FENCE + nouveau_bo_ref(NULL, &dec->fence_bo); +#endif + nouveau_bo_ref(NULL, &dec->fw_bo); + + for (i = 0; i < NVC0_VIDEO_QDEPTH; ++i) + nouveau_bo_ref(NULL, &dec->bsp_bo[i]); + + nouveau_object_del(&dec->bsp); + nouveau_object_del(&dec->vp); + nouveau_object_del(&dec->ppp); + + if (dec->channel[0] != dec->channel[1]) { + for (i = 0; i < 3; ++i) { + nouveau_pushbuf_del(&dec->pushbuf[i]); + nouveau_object_del(&dec->channel[i]); + } + } else { + nouveau_pushbuf_del(dec->pushbuf); + nouveau_object_del(dec->channel); + } + + FREE(dec); +} + +static void nvc0_video_getpath(enum pipe_video_profile profile, char *path) +{ + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + sprintf(path, "/lib/firmware/nouveau/vuc-mpeg12-0"); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + sprintf(path, "/lib/firmware/nouveau/vuc-mpeg4-0"); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + sprintf(path, "/lib/firmware/nouveau/vuc-vc1-%u", profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + sprintf(path, "/lib/firmware/nouveau/vuc-h264-0"); + break; + } + default: assert(0); + } +} + struct pipe_video_decoder * nvc0_create_decoder(struct pipe_context *context, enum pipe_video_profile profile, @@ -62,6 +172,20 @@ nvc0_create_decoder(struct pipe_context *context, unsigned width, unsigned height, unsigned max_references, bool chunked_decode) { + struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; + struct nvc0_decoder *dec; + struct nouveau_pushbuf **push; + union nouveau_bo_config cfg; + bool kepler = screen->device->chipset >= 0xe0; + + cfg.nvc0.tile_mode = 0x10; + cfg.nvc0.memtype = 0xfe; + + int ret, i; + uint32_t codec = 1, ppp_codec = 3; + uint32_t timeout; + u32 tmp_size = 0; + if (getenv("XVMC_VL")) return vl_create_decoder(context, profile, entrypoint, chroma_format, width, height, @@ -72,6 +196,307 @@ nvc0_create_decoder(struct pipe_context *context, return NULL; } + dec = CALLOC_STRUCT(nvc0_decoder); + if (!dec) + return NULL; + dec->client = screen->client; + + if (!kepler) { + dec->bsp_idx = 5; + dec->vp_idx = 6; + dec->ppp_idx = 7; + } else { + dec->bsp_idx = 2; + dec->vp_idx = 2; + dec->ppp_idx = 2; + } + + for (i = 0; i < 3; ++i) + if (i && !kepler) { + dec->channel[i] = dec->channel[0]; + dec->pushbuf[i] = dec->pushbuf[0]; + } else { + void *data; + u32 size; + struct nvc0_fifo nvc0_args = {}; + struct nve0_fifo nve0_args = {}; + + if (!kepler) { + size = sizeof(nvc0_args); + data = &nvc0_args; + } else { + unsigned engine[] = { + NVE0_FIFO_ENGINE_BSP, + NVE0_FIFO_ENGINE_VP, + NVE0_FIFO_ENGINE_PPP + }; + + nve0_args.engine = engine[i]; + size = sizeof(nve0_args); + data = &nve0_args; + } + + ret = nouveau_object_new(&screen->device->object, 0, + NOUVEAU_FIFO_CHANNEL_CLASS, + data, size, &dec->channel[i]); + + if (!ret) + ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, + 32 * 1024, true, &dec->pushbuf[i]); + if (ret) + break; + } + push = dec->pushbuf; + + if (!kepler) { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); + } else { + if (!ret) + ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); + if (!ret) + ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); + if (!ret) + ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); + } + if (ret) + goto fail; + + BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[0], dec->bsp->handle); + + BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[1], dec->vp->handle); + + BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push[2], dec->ppp->handle); + + dec->base.context = context; + dec->base.profile = profile; + dec->base.entrypoint = entrypoint; + dec->base.chroma_format = chroma_format; + dec->base.width = width; + dec->base.height = height; + dec->base.max_references = max_references; + dec->base.destroy = nvc0_decoder_destroy; + dec->base.flush = nvc0_decoder_flush; + dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; + dec->base.begin_frame = nvc0_decoder_begin_frame; + dec->base.end_frame = nvc0_decoder_end_frame; + + for (i = 0; i < NVC0_VIDEO_QDEPTH && !ret; ++i) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0, 1 << 20, &cfg, &dec->bsp_bo[i]); + if (!ret) + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); + if (!ret) { + if (!kepler) + nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); + else + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, + 0x100, dec->inter_bo[0]->size, &cfg, + &dec->inter_bo[1]); + } + if (ret) + goto fail; + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + codec = 1; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + codec = 4; + tmp_size = mb(height)*16 * mb(width)*16; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + ppp_codec = codec = 2; + tmp_size = mb(height)*16 * mb(width)*16; + assert(max_references <= 2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + codec = 3; + dec->tmp_stride = 16 * mb_half(width) * nvc0_video_align(height) * 3 / 2; + tmp_size = dec->tmp_stride * (max_references + 1); + assert(max_references <= 16); + break; + } + default: + fprintf(stderr, "invalid codec\n"); + goto fail; + } + + if (screen->device->chipset < 0xd0) { + int fd; + char path[PATH_MAX]; + ssize_t r; + uint32_t *end, endval; + + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x4000, &cfg, &dec->fw_bo); + if (!ret) + ret = nouveau_bo_map(dec->fw_bo, NOUVEAU_BO_WR, dec->client); + if (ret) + goto fail; + + nvc0_video_getpath(profile, path); + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "opening firmware file %s failed: %m\n", path); + goto fw_fail; + } + r = read(fd, dec->fw_bo->map, 0x4000); + if (r < 0) { + fprintf(stderr, "reading firmware file %s failed: %m\n", path); + goto fw_fail; + } + + if (r == 0x4000) { + close(fd); + fprintf(stderr, "firmware file %s too large!\n", path); + goto fw_fail; + } + + if (r & 0xff) { + close(fd); + fprintf(stderr, "firmware file %s wrong size!\n", path); + goto fw_fail; + } + + end = dec->fw_bo->map + r - 4; + endval = *end; + while (endval == *end) + end--; + + r = (intptr_t)end - (intptr_t)dec->fw_bo->map + 4; + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: { + assert((r & 0xff) == 0xe0); + dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: { + assert((r & 0xff) == 0xe0); + dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); + break; + } + case PIPE_VIDEO_CODEC_VC1: { + assert((r & 0xff) == 0xac); + dec->fw_sizes = (0x3ac<<16) | (r - 0x3ac); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + assert((r & 0xff) == 0x70); + dec->fw_sizes = (0x370<<16) | (r - 0x370); + break; + } + default: + goto fw_fail; + } + munmap(dec->fw_bo->map, dec->fw_bo->size); + dec->fw_bo->map = NULL; + } + + if (codec != 3) { + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + 0x400, &cfg, &dec->bitplane_bo); + if (ret) + goto fail; + } + + dec->ref_stride = mb(width)*16 * (mb_half(height)*32 + nvc0_video_align(height)/2); + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, + dec->ref_stride * (max_references+2) + tmp_size, + &cfg, &dec->ref_bo); + if (ret) + goto fail; + + timeout = 0; + + BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); + PUSH_DATA (push[0], codec); + PUSH_DATA (push[0], timeout); + + BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); + PUSH_DATA (push[1], codec); + PUSH_DATA (push[1], timeout); + + BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); + PUSH_DATA (push[2], ppp_codec); + PUSH_DATA (push[2], timeout); + + ++dec->fence_seq; + +#if NVC0_DEBUG_FENCE + ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, + 0, 0x1000, &cfg, &dec->fence_bo); + if (ret) + goto fail; + + nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); + dec->fence_map = dec->fence_bo->map; + dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; + dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); + + /* So lets test if the fence is working? */ + BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); + PUSH_DATAh(push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_bo->offset); + PUSH_DATA (push[0], dec->fence_seq); + + BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); + PUSH_DATA (push[0], 1); + PUSH_KICK (push[0]); + + BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); + PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push[1], dec->fence_seq); + + BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); + PUSH_DATA (push[1], 1); + PUSH_KICK (push[1]); + + BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); + PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push[2], dec->fence_seq); + + BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); + PUSH_DATA (push[2], 1); + PUSH_KICK (push[2]); + + usleep(100); + while (dec->fence_seq > dec->fence_map[0] && + dec->fence_seq > dec->fence_map[4] && + dec->fence_seq > dec->fence_map[8]) { + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); + usleep(100); + } + debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); +#endif + + return &dec->base; + +fw_fail: + debug_printf("Cannot create decoder without firmware..\n"); + nvc0_decoder_destroy(&dec->base); + return NULL; + +fail: + debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); + nvc0_decoder_destroy(&dec->base); return NULL; } diff --git a/src/gallium/drivers/nvc0/nvc0_video.h b/src/gallium/drivers/nvc0/nvc0_video.h index e2cfc3d5902..4cc0ebbb605 100644 --- a/src/gallium/drivers/nvc0/nvc0_video.h +++ b/src/gallium/drivers/nvc0/nvc0_video.h @@ -1,5 +1,5 @@ /* - * Copyright 2011 Maarten Lankhorst + * Copyright 2011-2013 Maarten Lankhorst * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,6 +29,30 @@ #include "util/u_video.h" +#define SLICE_SIZE 0x200 +#define VP_OFFSET 0x200 +#define COMM_OFFSET 0x500 + +//#define NVC0_DEBUG_FENCE 1 + +#ifdef NVC0_DEBUG_FENCE +# define NVC0_VIDEO_QDEPTH 1 +#else +# define NVC0_VIDEO_QDEPTH 2 +#endif + +#define SUBC_BSP(m) dec->bsp_idx, (m) +#define SUBC_VP(m) dec->vp_idx, (m) +#define SUBC_PPP(m) dec->ppp_idx, (m) + +union pipe_desc { + struct pipe_picture_desc *base; + struct pipe_mpeg12_picture_desc *mpeg12; + struct pipe_mpeg4_picture_desc *mpeg4; + struct pipe_vc1_picture_desc *vc1; + struct pipe_h264_picture_desc *h264; +}; + struct nvc0_video_buffer { struct pipe_video_buffer base; unsigned num_planes, valid_ref; @@ -38,6 +62,79 @@ struct nvc0_video_buffer { struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; }; +struct nvc0_decoder { + struct pipe_video_decoder base; + struct nouveau_client *client; + struct nouveau_object *channel[3], *bsp, *vp, *ppp; + struct nouveau_pushbuf *pushbuf[3]; + +#ifdef NVC0_DEBUG_FENCE + /* dump fence and comm, as needed.. */ + unsigned *fence_map; + struct comm *comm; + + struct nouveau_bo *fence_bo; +#endif + + struct nouveau_bo *fw_bo, *bitplane_bo; + + // array size max_references + 2, contains unpostprocessed images + // added at the end of ref_bo is a tmp array + // tmp is an array for h264, with each member being used for a ref frame or current + // target.. size = (((mb(w)*((mb(h)+1)&~1))+3)>>2)<<8 * (max_references+1) + // for other codecs, it simply seems that size = w*h is enough + // unsure what it's supposed to contain.. + struct nouveau_bo *ref_bo; + + struct nouveau_bo *inter_bo[2]; + + struct nouveau_bo *bsp_bo[NVC0_VIDEO_QDEPTH]; + + // bo's used by each cycle: + + // bsp_bo: contains raw bitstream data and parameters for BSP and VP. + // inter_bo: contains data shared between BSP and VP + // ref_bo: reference image data, used by PPP and VP + // bitplane_bo: contain bitplane data (similar to ref_bo), used by BSP only + // fw_bo: used by VP only. + + // Needed amount of copies in optimal case: + // 2 copies of inter_bo, VP would process the last inter_bo, while BSP is + // writing out a new set. + // NVC0_VIDEO_QDEPTH copies of bsp_bo. We don't want to block the pipeline ever, + // and give shaders a chance to run as well. + + struct { + struct nvc0_video_buffer *vidbuf; + unsigned last_used; + unsigned field_pic_flag : 1; + unsigned decoded_top : 1; + unsigned decoded_bottom : 1; + } refs[17]; + unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride; + + unsigned bsp_idx, vp_idx, ppp_idx; +}; + +struct comm { + uint32_t bsp_cur_index; // 000 + uint32_t byte_ofs; // 004 + uint32_t status[0x10]; // 008 + uint32_t pos[0x10]; // 048 + uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted + + uint32_t pvp_cur_index; // 100 + uint32_t acked_byte_ofs; // 104 + uint32_t status_vp[0x10]; // 108 + uint16_t mb_y[0x10]; //148 + uint32_t pvp_stage; // 168 0xeeXX + uint16_t parse_endpos_index; // 16c + uint16_t irq_index; // 16e + uint8_t irq_470[0x10]; // 170 + uint32_t irq_pos[0x10]; // 180 + uint32_t parse_endpos[0x10]; // 1c0 +}; + static INLINE uint32_t nvc0_video_align(uint32_t h) { return ((h+0x3f)&~0x3f); @@ -52,3 +149,73 @@ static INLINE uint32_t mb_half(uint32_t coord) { return (coord + 0x1f)>>5; } + +static INLINE uint64_t +nvc0_video_addr(struct nvc0_decoder *dec, struct nvc0_video_buffer *target) +{ + uint64_t ret; + if (target) + ret = dec->ref_stride * target->valid_ref; + else + ret = dec->ref_stride * (dec->base.max_references+1); + return dec->ref_bo->offset + ret; +} + +static INLINE void +nvc0_decoder_ycbcr_offsets(struct nvc0_decoder *dec, uint32_t *y2, + uint32_t *cbcr, uint32_t *cbcr2) +{ + uint32_t w = mb(dec->base.width), size; + *y2 = mb_half(dec->base.height)*w; + *cbcr = *y2 * 2; + *cbcr2 = *cbcr + w * (nvc0_video_align(dec->base.height)>>6); + + /* The check here should never fail because it means a bug + * in the code rather than a bug in hardware.. + */ + size = (2 * (*cbcr2 - *cbcr) + *cbcr) << 8; + if (size > dec->ref_stride) { + debug_printf("Overshot ref_stride (%u) with size %u and ofs (%u,%u,%u)\n", + dec->ref_stride, size, *y2<<8, *cbcr<<8, *cbcr2<<8); + *y2 = *cbcr = *cbcr2 = 0; + assert(size <= dec->ref_stride); + } +} + +static INLINE void +nvc0_decoder_inter_sizes(struct nvc0_decoder *dec, uint32_t slice_count, + uint32_t *slice_size, uint32_t *bucket_size, + uint32_t *ring_size) +{ + *slice_size = (SLICE_SIZE * slice_count)>>8; + if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG12) + *bucket_size = 0; + else + *bucket_size = mb(dec->base.width) * 3; + *ring_size = (dec->inter_bo[0]->size >> 8) - *bucket_size - *slice_size; +} + +extern unsigned +nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, + union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, + unsigned *caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void +nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nvc0_video_buffer *refs[16]); + +extern void +nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq); diff --git a/src/gallium/drivers/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nvc0/nvc0_video_bsp.c new file mode 100644 index 00000000000..798b4bfacac --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_bsp.c @@ -0,0 +1,423 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" + +struct strparm_bsp { + uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi + uint32_t w1[4]; // bit 8-24 addr_lo + uint32_t unk20; // should be idx * 0x8000000, bitstream offset + uint32_t do_crypto_crap; // set to 0 +}; + +struct mpeg12_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t picture_structure; + uint8_t picture_coding_type; + uint8_t intra_dc_precision; + uint8_t frame_pred_frame_dct; + uint8_t concealment_motion_vectors; + uint8_t intra_vlc_format; + uint16_t pad; + uint8_t f_code[2][2]; +}; + +struct mpeg4_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t vop_time_increment_size; + uint8_t interlaced; + uint8_t resync_marker_disable; +}; + +struct vc1_picparm_bsp { + uint16_t width; + uint16_t height; + uint8_t profile; // 04 0 simple, 1 main, 2 advanced + uint8_t postprocflag; // 05 + uint8_t pulldown; // 06 + uint8_t interlaced; // 07 + uint8_t tfcntrflag; // 08 + uint8_t finterpflag; // 09 + uint8_t psf; // 0a + uint8_t pad; // 0b + uint8_t multires; // 0c + uint8_t syncmarker; // 0d + uint8_t rangered; // 0e + uint8_t maxbframes; // 0f + uint8_t dquant; // 10 + uint8_t panscan_flag; // 11 + uint8_t refdist_flag; // 12 + uint8_t quantizer; // 13 + uint8_t extended_mv; // 14 + uint8_t extended_dmv; // 15 + uint8_t overlap; // 16 + uint8_t vstransform; // 17 +}; + +struct h264_picparm_bsp { + // 00 + uint32_t unk00; + // 04 + uint32_t log2_max_frame_num_minus4; // 04 checked + uint32_t pic_order_cnt_type; // 08 checked + uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked + uint32_t delta_pic_order_always_zero_flag; // 10, or unknown + + uint32_t frame_mbs_only_flag; // 14, always 1? + uint32_t direct_8x8_inference_flag; // 18, always 1? + uint32_t width_mb; // 1c checked + uint32_t height_mb; // 20 checked + // 24 + //struct picparm2 + uint32_t entropy_coding_mode_flag; // 00, checked + uint32_t pic_order_present_flag; // 04 checked + uint32_t unk; // 08 seems to be 0? + uint32_t pad1; // 0c seems to be 0? + uint32_t pad2; // 10 always 0 ? + uint32_t num_ref_idx_l0_active_minus1; // 14 always 0? + uint32_t num_ref_idx_l1_active_minus1; // 18 always 0? + uint32_t weighted_pred_flag; // 1c checked + uint32_t weighted_bipred_idc; // 20 checked + uint32_t pic_init_qp_minus26; // 24 checked + uint32_t deblocking_filter_control_present_flag; // 28 always 1? + uint32_t redundant_pic_cnt_present_flag; // 2c always 0? + uint32_t transform_8x8_mode_flag; // 30 checked + uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish + uint8_t field_pic_flag; // 38 checked + uint8_t bottom_field_flag; // 39 checked + uint8_t real_pad[0x1b]; // XX why? +}; + +static uint32_t +nvc0_decoder_fill_picparm_mpeg12_bsp(struct nvc0_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + char *map) +{ + struct mpeg12_picparm_bsp *pic_bsp = (struct mpeg12_picparm_bsp *)map; + int i; + pic_bsp->width = dec->base.width; + pic_bsp->height = dec->base.height; + pic_bsp->picture_structure = desc->picture_structure; + pic_bsp->picture_coding_type = desc->picture_coding_type; + pic_bsp->intra_dc_precision = desc->intra_dc_precision; + pic_bsp->frame_pred_frame_dct = desc->frame_pred_frame_dct; + pic_bsp->concealment_motion_vectors = desc->concealment_motion_vectors; + pic_bsp->intra_vlc_format = desc->intra_vlc_format; + pic_bsp->pad = 0; + for (i = 0; i < 4; ++i) + pic_bsp->f_code[i/2][i%2] = desc->f_code[i/2][i%2] + 1; // FU + + return (desc->num_slices << 4) | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg4_bsp(struct nvc0_decoder *dec, + struct pipe_mpeg4_picture_desc *desc, + char *map) +{ + struct mpeg4_picparm_bsp *pic_bsp = (struct mpeg4_picparm_bsp *)map; + uint32_t t, bits = 0; + pic_bsp->width = dec->base.width; + pic_bsp->height = dec->base.height; + assert(desc->vop_time_increment_resolution > 0); + + t = desc->vop_time_increment_resolution - 1; + while (t) { + bits++; + t /= 2; + } + if (!bits) + bits = 1; + t = desc->vop_time_increment_resolution - 1; + pic_bsp->vop_time_increment_size = bits; + pic_bsp->interlaced = desc->interlaced; + pic_bsp->resync_marker_disable = desc->resync_marker_disable; + return 4; +} + +static uint32_t +nvc0_decoder_fill_picparm_vc1_bsp(struct nvc0_decoder *dec, + struct pipe_vc1_picture_desc *d, + char *map) +{ + struct vc1_picparm_bsp *vc = (struct vc1_picparm_bsp *)map; + uint32_t caps = (d->slice_count << 4)&0xfff0; + vc->width = dec->base.width; + vc->height = dec->base.height; + vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE; // 04 + vc->postprocflag = d->postprocflag; + vc->pulldown = d->pulldown; + vc->interlaced = d->interlace; + vc->tfcntrflag = d->tfcntrflag; // 08 + vc->finterpflag = d->finterpflag; + vc->psf = d->psf; + vc->pad = 0; + vc->multires = d->multires; // 0c + vc->syncmarker = d->syncmarker; + vc->rangered = d->rangered; + vc->maxbframes = d->maxbframes; + vc->dquant = d->dquant; // 10 + vc->panscan_flag = d->panscan_flag; + vc->refdist_flag = d->refdist_flag; + vc->quantizer = d->quantizer; + vc->extended_mv = d->extended_mv; // 14 + vc->extended_dmv = d->extended_dmv; + vc->overlap = d->overlap; + vc->vstransform = d->vstransform; + return caps | 2; +} + +static uint32_t +nvc0_decoder_fill_picparm_h264_bsp(struct nvc0_decoder *dec, + struct pipe_h264_picture_desc *d, + char *map) +{ + struct h264_picparm_bsp stub_h = {}, *h = &stub_h; + uint32_t caps = (d->slice_count << 4)&0xfff0; + + assert(!(d->slice_count & ~0xfff)); + if (d->slice_count & 0x1000) + caps |= 1 << 20; + + assert(offsetof(struct h264_picparm_bsp, bottom_field_flag) == (0x39 + 0x24)); + h->unk00 = 1; + h->pad1 = h->pad2 = 0; + h->unk = 0; + h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4; + h->frame_mbs_only_flag = d->frame_mbs_only_flag; + h->direct_8x8_inference_flag = d->direct_8x8_inference_flag; + h->width_mb = mb(dec->base.width); + h->height_mb = mb(dec->base.height); + h->entropy_coding_mode_flag = d->entropy_coding_mode_flag; + h->pic_order_present_flag = d->pic_order_present_flag; + h->pic_order_cnt_type = d->pic_order_cnt_type; + h->log2_max_pic_order_cnt_lsb_minus4 = d->log2_max_pic_order_cnt_lsb_minus4; + h->delta_pic_order_always_zero_flag = d->delta_pic_order_always_zero_flag; + h->num_ref_idx_l0_active_minus1 = d->num_ref_idx_l0_active_minus1; + h->num_ref_idx_l1_active_minus1 = d->num_ref_idx_l1_active_minus1; + h->weighted_pred_flag = d->weighted_pred_flag; + h->weighted_bipred_idc = d->weighted_bipred_idc; + h->pic_init_qp_minus26 = d->pic_init_qp_minus26; + h->deblocking_filter_control_present_flag = d->deblocking_filter_control_present_flag; + h->redundant_pic_cnt_present_flag = d->redundant_pic_cnt_present_flag; + h->transform_8x8_mode_flag = d->transform_8x8_mode_flag; + h->mb_adaptive_frame_field_flag = d->mb_adaptive_frame_field_flag; + h->field_pic_flag = d->field_pic_flag; + h->bottom_field_flag = d->bottom_field_flag; + memset(h->real_pad, 0, sizeof(h->real_pad)); + *(struct h264_picparm_bsp *)map = *h; + return caps | 3; +} + +#if NVC0_DEBUG_FENCE +static void dump_comm_bsp(struct comm *comm) +{ + unsigned idx = comm->bsp_cur_index & 0xf; + debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs); + debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]); +} +#endif + +unsigned +nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, + unsigned comm_seq, unsigned num_buffers, + const void *const *data, const unsigned *num_bytes, + unsigned *vp_caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[0]; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + char *bsp; + uint32_t bsp_addr, comm_addr, inter_addr; + uint32_t slice_size, bucket_size, ring_size; + uint32_t endmarker, caps; + struct strparm_bsp *str_bsp; + int ret, i; + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + unsigned fence_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + +#ifdef NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client); + if (ret) { + debug_printf("map failed: %i %s\n", ret, strerror(-ret)); + return -1; + } + bsp = bsp_bo->map; + /* + * 0x000..0x100: picparm_bsp + * 0x200..0x500: picparm_vp + * 0x500..0x700: comm + * 0x700..onward: raw bitstream + */ + + switch (codec){ + case PIPE_VIDEO_CODEC_MPEG12: + endmarker = 0xb7010000; + caps = nvc0_decoder_fill_picparm_mpeg12_bsp(dec, desc.mpeg12, bsp); + break; + case PIPE_VIDEO_CODEC_MPEG4: + endmarker = 0xb1010000; + caps = nvc0_decoder_fill_picparm_mpeg4_bsp(dec, desc.mpeg4, bsp); + break; + case PIPE_VIDEO_CODEC_VC1: { + endmarker = 0x0a010000; + caps = nvc0_decoder_fill_picparm_vc1_bsp(dec, desc.vc1, bsp); + break; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + endmarker = 0x0b010000; + caps = nvc0_decoder_fill_picparm_h264_bsp(dec, desc.h264, bsp); + break; + } + default: assert(0); return -1; + } + + nvc0_decoder_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs); + + PUSH_SPACE(push, 6 + (codec == PIPE_VIDEO_CODEC_MPEG4_AVC ? 9 : 7) + fence_extra + 2); + if (!dec->bitplane_bo) + num_refs--; + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + caps |= 0 << 16; // reset struct comm if flag is set + caps |= 1 << 17; // enable watchdog + caps |= 0 << 18; // do not report error to VP, so it can continue decoding what we have + caps |= 0 << 19; // if enabled, use crypto crap? + bsp += 0x100; + + str_bsp = (struct strparm_bsp *)bsp; + memset(str_bsp, 0, 0x80); + str_bsp->w0[0] = 16; + str_bsp->w1[0] = 0x1; + bsp += 0x100; + /* Reserved for picparm_vp */ + bsp += 0x300; + /* Reserved for comm */ +#if !NVC0_DEBUG_FENCE + memset(bsp, 0, 0x200); +#endif + bsp += 0x200; + for (i = 0; i < num_buffers; ++i) { + memcpy(bsp, data[i], num_bytes[i]); + bsp += num_bytes[i]; + str_bsp->w0[0] += num_bytes[i]; + } + + /* Append end sequence */ + *(uint32_t *)bsp = endmarker; + bsp += 4; + *(uint32_t *)bsp = 0x00000000; + bsp += 4; + *(uint32_t *)bsp = endmarker; + bsp += 4; + *(uint32_t *)bsp = 0x00000000; + + bsp_addr = bsp_bo->offset >> 8; + inter_addr = inter_bo->offset >> 8; + +#if NVC0_DEBUG_FENCE + memset(dec->comm, 0, 0x200); + comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + + BEGIN_NVC0(push, SUBC_BSP(0x700), 5); + PUSH_DATA (push, caps); // 700 cmd + PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp + PUSH_DATA (push, bsp_addr + 7); // 708 str addr + PUSH_DATA (push, comm_addr); // 70c comm + PUSH_DATA (push, comm_seq); // 710 seq + + if (codec != PIPE_VIDEO_CODEC_MPEG4_AVC) { + u32 bitplane_addr; + + bitplane_addr = dec->bitplane_bo->offset >> 8; + + nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 6); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr + PUSH_DATA (push, ring_size << 8); // 40c interdata_size + PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA + PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE + } else { + nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + BEGIN_NVC0(push, SUBC_BSP(0x400), 8); + PUSH_DATA (push, bsp_addr); // 400 picparm addr + PUSH_DATA (push, inter_addr); // 404 interparm addr + PUSH_DATA (push, slice_size << 8); // 408 interparm size? + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr + PUSH_DATA (push, ring_size << 8); // 410 interdata size + PUSH_DATA (push, inter_addr + slice_size); // 414 bucket? + PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted.. + PUSH_DATA (push, 0); // 41c targets + // TODO: Double check 414 / 418 with nvidia trace + } + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_BSP(0x240), 3); + PUSH_DATAh(push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_bo->offset); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("%u: %u\n", dec->fence_seq, dec->fence_map[0]); + dump_comm_bsp(dec->comm); + } + } while (dec->fence_seq > dec->fence_map[0]); + } + + dump_comm_bsp(dec->comm); + return dec->comm->status[comm_seq & 0xf]; +#else + BEGIN_NVC0(push, SUBC_BSP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); + return 2; +#endif +} diff --git a/src/gallium/drivers/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nvc0/nvc0_video_ppp.c new file mode 100644 index 00000000000..2e99540d6d4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_ppp.c @@ -0,0 +1,145 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" + +static void +nvc0_decoder_setup_ppp(struct nvc0_decoder *dec, struct nvc0_video_buffer *target, uint32_t low700) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + uint32_t stride_in = mb(dec->base.width); + uint32_t stride_out = mb(target->resources[0]->width0); + uint32_t dec_h = mb(dec->base.height); + uint32_t dec_w = mb(dec->base.width); + uint64_t in_addr; + uint32_t y2, cbcr, cbcr2, i; + struct nouveau_pushbuf_refn bo_refs[] = { + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + }; + unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + bo_refs[i].bo = mt->base.bo; + } + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + nvc0_decoder_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2); + + BEGIN_NVC0(push, SUBC_PPP(0x700), 10); + in_addr = nvc0_video_addr(dec, target) >> 8; + + PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700 + PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704 + assert(dec_w == stride_in); + + /* Input: */ + PUSH_DATA (push, in_addr); // 708 + PUSH_DATA (push, in_addr + y2); // 70c + PUSH_DATA (push, in_addr + cbcr); // 710 + PUSH_DATA (push, in_addr + cbcr2); // 714 + assert(target->resources[0]->width0 >= 16 * dec_w); + assert(target->resources[0]->height0 >= dec->base.height/2); + + for (i = 0; i < 2; ++i) { + struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i]; + + PUSH_DATA (push, mt->base.address >> 8); + PUSH_DATA (push, (mt->base.address + mt->total_size/2/mt->base.base.array_size) >> 8); + mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; + } +} + +static uint32_t +nvc0_decoder_vc1_ppp(struct nvc0_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nvc0_video_buffer *target) { + struct nouveau_pushbuf *push = dec->pushbuf[2]; + + nvc0_decoder_setup_ppp(dec, target, 0x1412); + assert(!desc->deblockEnable); + assert(!(dec->base.width & 0xf)); + assert(!(dec->base.height & 0xf)); + + BEGIN_NVC0(push, SUBC_PPP(0x400), 1); + PUSH_DATA (push, desc->pquant << 11); + + // 728 = wtf? + return 0x10; +} + +void +nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc, struct nvc0_video_buffer *target, unsigned comm_seq) { + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_pushbuf *push = dec->pushbuf[2]; + unsigned ppp_caps = 0x10; + unsigned fence_extra = 0; + +#if NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + PUSH_SPACE(push, 11 + (codec == PIPE_VIDEO_CODEC_VC1 ? 2 : 0) + 3 + fence_extra + 2); + + switch (codec) { + case PIPE_VIDEO_CODEC_MPEG12: { + unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1; + nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2); + break; + } + case PIPE_VIDEO_CODEC_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break; + case PIPE_VIDEO_CODEC_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break; + case PIPE_VIDEO_CODEC_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break; + default: assert(0); + } + BEGIN_NVC0(push, SUBC_PPP(0x734), 2); + PUSH_DATA (push, comm_seq); + PUSH_DATA (push, ppp_caps); + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_PPP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x20)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK (push); + + { + unsigned spin = 0; + + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) + debug_printf("ppp%u: %u\n", dec->fence_seq, dec->fence_map[8]); + } while (dec->fence_seq > dec->fence_map[8]); + } +#else + BEGIN_NVC0(push, SUBC_PPP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} diff --git a/src/gallium/drivers/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nvc0/nvc0_video_vp.c new file mode 100644 index 00000000000..84af0d68be9 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_video_vp.c @@ -0,0 +1,667 @@ +/* + * Copyright 2011-2013 Maarten Lankhorst + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nvc0_video.h" +#include + +struct mpeg12_picparm_vp { + uint16_t width; // 00 in mb units + uint16_t height; // 02 in mb units + + uint32_t unk04; // 04 stride for Y? + uint32_t unk08; // 08 stride for CbCr? + + uint32_t ofs[6]; // 1c..20 ofs + uint32_t bucket_size; // 24 + uint32_t inter_ring_data_size; // 28 + uint16_t unk2c; // 2c + uint16_t alternate_scan; // 2e + uint16_t unk30; // 30 not seen set yet + uint16_t picture_structure; // 32 + uint16_t pad2[3]; + uint16_t unk3a; // 3a set on I frame? + + uint32_t f_code[4]; // 3c + uint32_t picture_coding_type; // 4c + uint32_t intra_dc_precision; // 50 + uint32_t q_scale_type; // 54 + uint32_t top_field_first; // 58 + uint32_t full_pel_forward_vector; // 5c + uint32_t full_pel_backward_vector; // 60 + uint8_t intra_quantizer_matrix[0x40]; // 64 + uint8_t non_intra_quantizer_matrix[0x40]; // a4 +}; + +struct mpeg4_picparm_vp { + uint32_t width; // 00 in normal units + uint32_t height; // 04 in normal units + uint32_t unk08; // stride 1 + uint32_t unk0c; // stride 2 + uint32_t ofs[6]; // 10..24 ofs + uint32_t bucket_size; // 28 + uint32_t pad1; // 2c, pad + uint32_t pad2; // 30 + uint32_t inter_ring_data_size; // 34 + + uint32_t trd[2]; // 38, 3c + uint32_t trb[2]; // 40, 44 + uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile + uint16_t f_code_fw; // 4c + uint16_t f_code_bw; // 4e + uint8_t interlaced; // 50 + + uint8_t quant_type; // bool, written to 528 + uint8_t quarter_sample; // bool, written to 548 + uint8_t short_video_header; // bool, negated written to 528 shifted by 1 + uint8_t u54; // bool, written to 0x740 + uint8_t vop_coding_type; // 55 + uint8_t rounding_control; // 56 + uint8_t alternate_vertical_scan_flag; // 57 bool + uint8_t top_field_first; // bool, written to vuc + + uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob + uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY? + + uint32_t intra[0x10]; // 9c + uint32_t non_intra[0x10]; // bc + // udc..uff pad? +}; + +// Full version, with data pumped from BSP +struct vc1_picparm_vp { + uint32_t bucket_size; // 00 + uint32_t pad; // 04 + + uint32_t inter_ring_data_size; // 08 + uint32_t unk0c; // stride 1 + uint32_t unk10; // stride 2 + uint32_t ofs[6]; // 14..28 ofs + + uint16_t width; // 2c + uint16_t height; // 2e + + uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced + uint8_t loopfilter; // 31 written into vuc + uint8_t fastuvmc; // 32, written into vuc + uint8_t dquant; // 33 + + uint8_t overlap; // 34 + uint8_t quantizer; // 35 + uint8_t u36; // 36, bool + uint8_t pad2; // 37, to align to 0x38 +}; + +struct h264_picparm_vp { // 700..a00 + uint16_t width, height; + uint32_t stride1, stride2; // 04 08 + uint32_t ofs[6]; // 0c..24 in-image offset + + uint32_t u24; // nfi ac8 ? + uint32_t bucket_size; // 28 bucket size + uint32_t inter_ring_data_size; // 2c + + unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something? + unsigned f1 : 1; // 1 0x02: into vuc ofs 56 + unsigned weighted_pred_flag : 1; // 2 0x04 + unsigned f3 : 1; // 3 0x08: into vuc ofs 68 + unsigned is_reference : 1; // 4 + unsigned interlace : 1; // 5 field_pic_flag + unsigned bottom_field_flag : 1; // 6 + unsigned f7 : 1; // 7 0x80: nfi yet + + signed log2_max_frame_num_minus4 : 4; // 31 0..3 + unsigned u31_45 : 2; // 31 4..5 + unsigned pic_order_cnt_type : 2; // 31 6..7 + signed pic_init_qp_minus26 : 6; // 32 0..5 + signed chroma_qp_index_offset : 5; // 32 6..10 + signed second_chroma_qp_index_offset : 5; // 32 11..15 + + unsigned weighted_bipred_idc : 2; // 34 0..1 + unsigned fifo_dec_index : 7; // 34 2..8 + unsigned tmp_idx : 5; // 34 9..13 + unsigned frame_number : 16; // 34 14..29 + unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30] + unsigned u34_3131 : 1; // 34 31..31 pad? + + uint32_t field_order_cnt[2]; // 38, 3c + + struct { // 40 + // 0x00223102 + // nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved.. + unsigned fifo_idx : 7; // 00 0..6 + unsigned tmp_idx : 5; // 00 7..11 + unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference + unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference? + unsigned unk14 : 1; // 00 14 skipped? + unsigned notseenyet : 1; // 00 15 pad? + unsigned unk16 : 1; // 00 16 + unsigned unk17 : 4; // 00 17..20 + unsigned unk21 : 4; // 00 21..24 + unsigned pad : 7; // 00 d25..31 + + uint32_t field_order_cnt[2]; // 04,08 + uint32_t frame_idx; // 0c + } refs[0x10]; + + uint8_t m4x4[6][16]; // 140 + uint8_t m8x8[2][64]; // 1a0 + uint32_t u220; // 220 number of extra reorder_list to append? + uint8_t u224[0x20]; // 224..244 reorder_list append ? + uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read +}; + +static void +nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffer *refs[16], unsigned seq, struct nvc0_video_buffer *target) +{ + unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG4_AVC; + unsigned i, idx, empty_spot = dec->base.max_references + 1; + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + continue; + + idx = refs[i]->valid_ref; + //debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx); + assert(target != refs[i] || + (h264 && empty_spot && + (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top))); + if (target == refs[i]) + empty_spot = 0; + assert(!h264 || + dec->refs[idx].last_used == seq - 1); + + if (dec->refs[idx].vidbuf != refs[i]) { + debug_printf("%p is not a real ref\n", refs[i]); + // FIXME: Maybe do m2mf copy here if a application really depends on it? + continue; + } + + assert(dec->refs[idx].vidbuf == refs[i]); + dec->refs[idx].last_used = seq; + } + if (!empty_spot) + return; + + /* Try to find a real empty spot first, there should be one.. + */ + for (i = 0; i < dec->base.max_references + 1; ++i) { + if (dec->refs[i].last_used < seq) { + if (!dec->refs[i].vidbuf) { + empty_spot = i; + break; + } + if (empty_spot < dec->base.max_references+1 && + dec->refs[empty_spot].last_used < dec->refs[i].last_used) + continue; + empty_spot = i; + } + } + assert(empty_spot < dec->base.max_references+1); + dec->refs[empty_spot].last_used = seq; +// debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i); + dec->refs[empty_spot].vidbuf = target; + dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0; + target->valid_ref = empty_spot; +} + +static void +nvc0_decoder_kick_ref(struct nvc0_decoder *dec, struct nvc0_video_buffer *target) +{ + dec->refs[target->valid_ref].vidbuf = NULL; + dec->refs[target->valid_ref].last_used = 0; +// debug_printf("Unreffed %p\n", target); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg12_vp(struct nvc0_decoder *dec, + struct pipe_mpeg12_picture_desc *desc, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct mpeg12_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub; + uint32_t i, ret = 0x01010, ring; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk; + assert(!(dec->base.width & 0xf)); + *is_ref = desc->picture_coding_type <= 2; + + if (dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1) + pic_vp->picture_structure = 3; + else + pic_vp->picture_structure = desc->picture_structure; + + assert(desc->picture_structure != 4); + if (desc->picture_structure == 4) // Untested, but should work + ret |= 0x100; + pic_vp->width = mb(dec->base.width); + pic_vp->height = mb(dec->base.height); + pic_vp->unk08 = pic_vp->unk04 = (dec->base.width+0xf)&~0xf; // Stride + + nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]); + pic_vp->ofs[5] = pic_vp->ofs[3]; + pic_vp->ofs[0] = pic_vp->ofs[2] = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size); + + pic_vp->alternate_scan = desc->alternate_scan; + pic_vp->pad2[0] = pic_vp->pad2[1] = pic_vp->pad2[2] = 0; + pic_vp->unk30 = desc->picture_structure < 3 && (desc->picture_structure == 2 - desc->top_field_first); + pic_vp->unk3a = (desc->picture_coding_type == 1); + for (i = 0; i < 4; ++i) + pic_vp->f_code[i] = desc->f_code[i/2][i%2] + 1; // FU + pic_vp->picture_coding_type = desc->picture_coding_type; + pic_vp->intra_dc_precision = desc->intra_dc_precision; + pic_vp->q_scale_type = desc->q_scale_type; + pic_vp->top_field_first = desc->top_field_first; + pic_vp->full_pel_forward_vector = desc->full_pel_forward_vector; + pic_vp->full_pel_backward_vector = desc->full_pel_backward_vector; + memcpy(pic_vp->intra_quantizer_matrix, desc->intra_matrix, 0x40); + memcpy(pic_vp->non_intra_quantizer_matrix, desc->non_intra_matrix, 0x40); + memcpy(map, pic_vp, sizeof(*pic_vp)); + refs[0] = (struct nvc0_video_buffer *)desc->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1]; + return ret | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1); +} + +static uint32_t +nvc0_decoder_fill_picparm_mpeg4_vp(struct nvc0_decoder *dec, + struct pipe_mpeg4_picture_desc *desc, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub; + uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk; + assert(!(dec->base.width & 0xf)); + *is_ref = desc->vop_coding_type <= 1; + + pic_vp->width = dec->base.width; + pic_vp->height = mb(dec->base.height)<<4; + pic_vp->unk0c = pic_vp->unk08 = mb(dec->base.width)<<4; // Stride + + nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]); + pic_vp->ofs[5] = pic_vp->ofs[3]; + pic_vp->ofs[0] = pic_vp->ofs[2] = 0; + pic_vp->pad1 = pic_vp->pad2 = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size); + + pic_vp->trd[0] = desc->trd[0]; + pic_vp->trd[1] = desc->trd[1]; + pic_vp->trb[0] = desc->trb[0]; + pic_vp->trb[1] = desc->trb[1]; + pic_vp->u48 = 0; // Codec? + pic_vp->pad1 = pic_vp->pad2 = 0; + pic_vp->f_code_fw = desc->vop_fcode_forward; + pic_vp->f_code_bw = desc->vop_fcode_backward; + pic_vp->interlaced = desc->interlaced; + pic_vp->quant_type = desc->quant_type; + pic_vp->quarter_sample = desc->quarter_sample; + pic_vp->short_video_header = desc->short_video_header; + pic_vp->u54 = 0; + pic_vp->vop_coding_type = desc->vop_coding_type; + pic_vp->rounding_control = desc->rounding_control; + pic_vp->alternate_vertical_scan_flag = desc->alternate_vertical_scan_flag; + pic_vp->top_field_first = desc->top_field_first; + + memcpy(pic_vp->intra, desc->intra_matrix, 0x40); + memcpy(pic_vp->non_intra, desc->non_intra_matrix, 0x40); + memcpy(map, pic_vp, sizeof(*pic_vp)); + refs[0] = (struct nvc0_video_buffer *)desc->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1]; + return ret; +} + +static uint32_t +nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec, + const struct pipe_h264_picture_desc *d, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct h264_picparm_vp stub_h = {}, *h = &stub_h; + unsigned ring, i, j = 0; + assert(offsetof(struct h264_picparm_vp, u224) == 0x224); + *is_ref = d->is_reference; + assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num); + dec->last_frame_num = d->frame_num; + + h->width = mb(dec->base.width); + h->height = mb(dec->base.height); + h->stride1 = h->stride2 = mb(dec->base.width)*16; + nvc0_decoder_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]); + h->ofs[5] = h->ofs[3]; + h->ofs[0] = h->ofs[2] = 0; + h->u24 = dec->tmp_stride >> 8; + assert(h->u24); + nvc0_decoder_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size); + + h->u220 = 0; + h->f0 = d->mb_adaptive_frame_field_flag; + h->f1 = d->direct_8x8_inference_flag; + h->weighted_pred_flag = d->weighted_pred_flag; + h->f3 = d->constrained_intra_pred_flag; + h->is_reference = d->is_reference; + h->interlace = d->field_pic_flag; + h->bottom_field_flag = d->bottom_field_flag; + h->f7 = 0; // TODO: figure out when set.. + h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4; + h->u31_45 = 1; + + h->pic_order_cnt_type = d->pic_order_cnt_type; + h->pic_init_qp_minus26 = d->pic_init_qp_minus26; + h->chroma_qp_index_offset = d->chroma_qp_index_offset; + h->second_chroma_qp_index_offset = d->second_chroma_qp_index_offset; + h->weighted_bipred_idc = d->weighted_bipred_idc; + h->tmp_idx = 0; // set in h264_vp_refs below + h->fifo_dec_index = 0; // always set to 0 to be fifo compatible with other codecs + h->frame_number = d->frame_num; + h->u34_3030 = h->u34_3131 = 0; + h->field_order_cnt[0] = d->field_order_cnt[0]; + h->field_order_cnt[1] = d->field_order_cnt[1]; + memset(h->refs, 0, sizeof(h->refs)); + memcpy(h->m4x4, d->scaling_lists_4x4, sizeof(h->m4x4) + sizeof(h->m8x8)); + h->u220 = 0; + for (i = 0; i < d->num_ref_frames; ++i) { + if (!d->ref[i]) + break; + refs[j] = (struct nvc0_video_buffer *)d->ref[i]; + h->refs[j].fifo_idx = j + 1; + h->refs[j].tmp_idx = refs[j]->valid_ref; + h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0]; + h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1]; + h->refs[j].frame_idx = d->frame_num_list[i]; + if (!dec->refs[refs[j]->valid_ref].field_pic_flag) { + h->refs[j].unk12 = d->top_is_reference[i]; + h->refs[j].unk13 = d->bottom_is_reference[i]; + } + h->refs[j].unk14 = 0; + h->refs[j].notseenyet = 0; + h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag; + h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top && + d->top_is_reference[i]; + h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom && + d->bottom_is_reference[i]; + h->refs[j].pad = 0; + assert(!d->is_long_term[i]); + j++; + } + for (; i < 16; ++i) + assert(!d->ref[i]); + assert(d->num_ref_frames <= dec->base.max_references); + + for (; i < d->num_ref_frames; ++i) + h->refs[j].unk16 = d->field_pic_flag; + *(struct h264_picparm_vp *)map = *h; + + return 0x1113; +} + +static void +nvc0_decoder_fill_picparm_h264_vp_refs(struct nvc0_decoder *dec, + struct pipe_h264_picture_desc *d, + struct nvc0_video_buffer *refs[16], + struct nvc0_video_buffer *target, + char *map) +{ + struct h264_picparm_vp *h = (struct h264_picparm_vp *)map; + assert(dec->refs[target->valid_ref].vidbuf == target); +// debug_printf("Target: %p\n", target); + + h->tmp_idx = target->valid_ref; + dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag; + if (!d->field_pic_flag || d->bottom_field_flag) + dec->refs[target->valid_ref].decoded_bottom = 1; + if (!d->field_pic_flag || !d->bottom_field_flag) + dec->refs[target->valid_ref].decoded_top = 1; +} + +static uint32_t +nvc0_decoder_fill_picparm_vc1_vp(struct nvc0_decoder *dec, + struct pipe_vc1_picture_desc *d, + struct nvc0_video_buffer *refs[16], + unsigned *is_ref, + char *map) +{ + struct vc1_picparm_vp *vc = (struct vc1_picparm_vp *)map; + unsigned ring; + assert(dec->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE); + *is_ref = d->picture_type <= 1; + + nvc0_decoder_ycbcr_offsets(dec, &vc->ofs[1], &vc->ofs[3], &vc->ofs[4]); + vc->ofs[5] = vc->ofs[3]; + vc->ofs[0] = vc->ofs[2] = 0; + vc->width = dec->base.width; + vc->height = mb(dec->base.height)<<4; + vc->unk0c = vc->unk10 = mb(dec->base.width)<<4; // Stride + vc->pad = vc->pad2 = 0; + nvc0_decoder_inter_sizes(dec, 1, &ring, &vc->bucket_size, &vc->inter_ring_data_size); + vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE; + vc->loopfilter = d->loopfilter; + vc->fastuvmc = d->fastuvmc; + vc->dquant = d->dquant; + vc->overlap = d->overlap; + vc->quantizer = d->quantizer; + vc->u36 = 0; // ? No idea what this one is.. + refs[0] = (struct nvc0_video_buffer *)d->ref[0]; + refs[!!refs[0]] = (struct nvc0_video_buffer *)d->ref[1]; + return 0x12; +} + +#if NVC0_DEBUG_FENCE +static void dump_comm_vp(struct nvc0_decoder *dec, struct comm *comm, u32 comm_seq, + struct nouveau_bo *inter_bo, unsigned slice_size) +{ + unsigned i, idx = comm->pvp_cur_index & 0xf; + debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage); +#if 0 + debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs); + debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index); + + for (i = 0; i != comm->irq_index; ++i) + debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]); + for (i = 0; i != comm->parse_endpos_index; ++i) + debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]); +#endif + debug_printf("mb_y = %u\n", comm->mb_y[idx]); + if (comm->status_vp[idx] == 1) + return; + + if ((comm->pvp_stage & 0xff) != 0xff) { + unsigned *map; + assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0); + map = inter_bo->map; + for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) { + debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]); + } + munmap(inter_bo->map, inter_bo->size); + inter_bo->map = NULL; + } + assert((comm->pvp_stage & 0xff) == 0xff); +} +#endif + +void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned *caps, unsigned *is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + char *vp = bsp_bo->map + VP_OFFSET; + + switch (codec){ + case PIPE_VIDEO_CODEC_MPEG12: + *caps = nvc0_decoder_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + case PIPE_VIDEO_CODEC_MPEG4: + *caps = nvc0_decoder_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + case PIPE_VIDEO_CODEC_VC1: { + *caps = nvc0_decoder_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + return; + } + case PIPE_VIDEO_CODEC_MPEG4_AVC: { + *caps = nvc0_decoder_fill_picparm_h264_vp(dec, desc.h264, refs, is_ref, vp); + nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target); + nvc0_decoder_fill_picparm_h264_vp_refs(dec, desc.h264, refs, target, vp); + return; + } + default: assert(0); return; + } +} + +void +nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc, + struct nvc0_video_buffer *target, unsigned comm_seq, + unsigned caps, unsigned is_ref, + struct nvc0_video_buffer *refs[16]) +{ + struct nouveau_pushbuf *push = dec->pushbuf[1]; + uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr; + uint32_t slice_size, bucket_size, ring_size, i; + enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile); + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH]; + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1]; + u32 fence_extra = 0, codec_extra = 0; + struct nouveau_pushbuf_refn bo_refs[] = { + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM }, + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, +#ifdef NVC0_DEBUG_FENCE + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART }, +#endif + { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM }, + }; + int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo; + +#if NVC0_DEBUG_FENCE + fence_extra = 4; +#endif + + if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) { + nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size); + codec_extra += 2; + } else + nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size); + + if (dec->base.max_references > 2) + codec_extra += 1 + (dec->base.max_references - 2); + + pic_addr[16] = nvc0_video_addr(dec, target) >> 8; + last_addr = null_addr = nvc0_video_addr(dec, NULL) >> 8; + + for (i = 0; i < dec->base.max_references; ++i) { + if (!refs[i]) + pic_addr[i] = last_addr; + else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i]) + last_addr = pic_addr[i] = nvc0_video_addr(dec, refs[i]) >> 8; + else + pic_addr[i] = null_addr; + } + if (!is_ref) + nvc0_decoder_kick_ref(dec, target); + + PUSH_SPACE(push, 8 + 3 * (codec != PIPE_VIDEO_CODEC_MPEG12) + + 6 + codec_extra + fence_extra + 2); + + nouveau_pushbuf_refn(push, bo_refs, num_refs); + + bsp_addr = bsp_bo->offset >> 8; +#if NVC0_DEBUG_FENCE + comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8; +#else + comm_addr = bsp_addr + (COMM_OFFSET>>8); +#endif + inter_addr = inter_bo->offset >> 8; + if (dec->fw_bo) + ucode_addr = dec->fw_bo->offset >> 8; + else + ucode_addr = 0; + + BEGIN_NVC0(push, SUBC_VP(0x700), 7); + PUSH_DATA (push, caps); // 700 + PUSH_DATA (push, comm_seq); // 704 + PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0 + PUSH_DATA (push, dec->fw_sizes); // 70c + PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr + PUSH_DATA (push, inter_addr); // 714 inter_parm + PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs + + if (bucket_size) { + uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2); + + BEGIN_NVC0(push, SUBC_VP(0x71c), 2); + PUSH_DATA (push, tmpimg_addr >> 8); // 71c + PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs + } + + BEGIN_NVC0(push, SUBC_VP(0x724), 5); + PUSH_DATA (push, comm_addr); // 724 + PUSH_DATA (push, ucode_addr); // 728 + PUSH_DATA (push, pic_addr[16]); // 734 + PUSH_DATA (push, pic_addr[0]); // 72c + PUSH_DATA (push, pic_addr[1]); // 730 + + if (dec->base.max_references > 2) { + int i; + + BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2); + for (i = 2; i < dec->base.max_references; ++i) { + assert(0x400 + (i - 2) * 4 < 0x438); + PUSH_DATA (push, pic_addr[i]); + } + } + + if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) { + BEGIN_NVC0(push, SUBC_VP(0x438), 1); + PUSH_DATA (push, desc.h264->slice_count); + } + + //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]); + +#if NVC0_DEBUG_FENCE + BEGIN_NVC0(push, SUBC_VP(0x240), 3); + PUSH_DATAh(push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, (dec->fence_bo->offset + 0x10)); + PUSH_DATA (push, dec->fence_seq); + + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 1); + PUSH_KICK(push); + + { + unsigned spin = 0; + do { + usleep(100); + if ((spin++ & 0xff) == 0xff) { + debug_printf("vp%u: %u\n", dec->fence_seq, dec->fence_map[4]); + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); + } + } while (dec->fence_seq > dec->fence_map[4]); + } + dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8); +#else + BEGIN_NVC0(push, SUBC_VP(0x300), 1); + PUSH_DATA (push, 0); + PUSH_KICK (push); +#endif +} -- 2.30.2