X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv30%2Fnv30_transfer.c;h=37df721fa675fe9dc6a6c0b1412d381e15578f0a;hb=725671a83a67cc8cf16c0913f6e1835fb272c2fb;hp=2255a02caedb059e5add52eb7f44413b64b073be;hpb=aa02683e45f1eaf61bba2ba7eeda7686efeed2ca;p=mesa.git diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2255a02caed..37df721fa67 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -1,174 +1,754 @@ -#include -#include -#include -#include -#include -#include -#include +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + * + */ + +#define XFER_ARGS \ + struct nv30_context *nv30, enum nv30_transfer_filter filter, \ + struct nv30_rect *src, struct nv30_rect *dst + +#include "util/u_math.h" + +#include "nouveau/nv_object.xml.h" +#include "nouveau/nv_m2mf.xml.h" +#include "nv01_2d.xml.h" +#include "nv30-40_3d.xml.h" + #include "nv30_context.h" -#include "nv30_screen.h" -#include "nv30_state.h" +#include "nv30_transfer.h" + +/* Various helper functions to transfer different types of data in a number + * of different ways. + */ + +static INLINE boolean +nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst) +{ + if (src->x1 - src->x0 != dst->x1 - dst->x0) + return TRUE; + if (src->y1 - src->y0 != dst->y1 - dst->y0) + return TRUE; + return FALSE; +} + +static INLINE boolean +nv30_transfer_blit(XFER_ARGS) +{ + if (nv30->screen->eng3d->oclass < NV40_3D_CLASS) + return FALSE; + if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1) + return FALSE; + if (dst->w < 2 || dst->h < 2) + return FALSE; + if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch)) + return FALSE; + if (src->cpp > 4) + return FALSE; + return TRUE; +} + +static INLINE struct nouveau_heap * +nv30_transfer_rect_vertprog(struct nv30_context *nv30) +{ + struct nouveau_heap *heap = nv30->screen->vp_exec_heap; + struct nouveau_heap *vp; + + vp = nv30->blit_vp; + if (!vp) { + if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) { + while (heap->next && heap->size < 2) { + struct nouveau_heap **evict = heap->next->priv; + nouveau_heap_free(evict); + } + + if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) + return NULL; + } + + vp = nv30->blit_vp; + if (vp) { + struct nouveau_pushbuf *push = nv30->base.pushbuf; + + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); + PUSH_DATA (push, vp->start); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */ + PUSH_DATA (push, 0x0040000d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff80); + BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); + PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */ + PUSH_DATA (push, 0x0040080d); + PUSH_DATA (push, 0x8106c083); + PUSH_DATA (push, 0x6041ff9d); + } + } + + return vp; +} + + +static INLINE struct nv04_resource * +nv30_transfer_rect_fragprog(struct nv30_context *nv30) +{ + struct nv04_resource *fp = nv04_resource(nv30->blit_fp); + struct pipe_context *pipe = &nv30->base.pipe; + + if (!fp) { + nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4); + if (nv30->blit_fp) { + struct pipe_transfer *transfer; + u32 *map = pipe_buffer_map(pipe, nv30->blit_fp, + PIPE_TRANSFER_WRITE, &transfer); + if (map) { + map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */ + map[1] = 0x1c9dc801; + map[2] = 0x0001c800; + map[3] = 0x3fe1c800; + map[4] = 0x01401e81; /* end; */ + map[5] = 0x1c9dc800; + map[6] = 0x0001c800; + map[7] = 0x0001c800; + pipe_buffer_unmap(pipe, transfer); + } + + fp = nv04_resource(nv30->blit_fp); + nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM); + } + } + + return fp; +} + +static void +nv30_transfer_rect_blit(XFER_ARGS) +{ + struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30); + struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30); + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { fp->bo, fp->domain | NOUVEAU_BO_RD }, + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR }, + }; + u32 texfmt, texswz; + u32 format, stride; + + if (nouveau_pushbuf_space(push, 512, 8, 0) || + nouveau_pushbuf_refn (push, refs, sizeof(refs) / sizeof(refs[0]))) + return; + + /* various switches depending on cpp of the transfer */ + switch (dst->cpp) { + case 4: + format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 | + NV30_3D_RT_FORMAT_ZETA_Z24S8; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8; + texswz = 0x0000aae4; + break; + case 2: + format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 | + NV30_3D_RT_FORMAT_ZETA_Z16; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5; + texswz = 0x0000a9e4; + break; + case 1: + format = NV30_3D_RT_FORMAT_COLOR_B8 | + NV30_3D_RT_FORMAT_ZETA_Z16; + texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8; + texswz = 0x0000aaff; + break; + default: + assert(0); + return; + } + + /* render target */ + if (!dst->pitch) { + format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; + format |= util_logbase2(dst->w) << 16; + format |= util_logbase2(dst->h) << 24; + stride = 64; + } else { + format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; + stride = dst->pitch; + } + + BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2); + PUSH_DATA (push, dst->w << 16); + PUSH_DATA (push, dst->h << 16); + BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5); + PUSH_DATA (push, dst->w << 16); + PUSH_DATA (push, dst->h << 16); + PUSH_DATA (push, format); + PUSH_DATA (push, stride); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); + PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0); + + nv30->dirty |= NV30_NEW_FRAMEBUFFER; + + /* viewport state */ + BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + PUSH_DATAf(push, 1.0); + BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); + PUSH_DATAf(push, 0.0); + PUSH_DATAf(push, 1.0); + + nv30->dirty |= NV30_NEW_VIEWPORT; + + /* blend state */ + BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1); + PUSH_DATA (push, 0x01010101); + + nv30->dirty |= NV30_NEW_BLEND; + + /* depth-stencil-alpha state */ + BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1); + PUSH_DATA (push, 0); + + nv30->dirty |= NV30_NEW_ZSA; + + /* rasterizer state */ + BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1); + PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT); + BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2); + PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL); + PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL); + BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1); + PUSH_DATA (push, 0); + + nv30->state.scissor_off = 0; + nv30->dirty |= NV30_NEW_RASTERIZER; + + /* vertex program */ + BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); + PUSH_DATA (push, vp->start); + BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); + PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */ + PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */ + BEGIN_NV04(push, NV30_3D(ENGINE), 1); + PUSH_DATA (push, 0x00000103); + BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); + PUSH_DATA (push, 0x00000000); + + nv30->dirty |= NV30_NEW_VERTPROG; + nv30->dirty |= NV30_NEW_CLIP; + + /* fragment program */ + BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); + PUSH_RELOC(push, fp->bo, fp->offset, fp->domain | + NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV30_3D_FP_ACTIVE_PROGRAM_DMA0, + NV30_3D_FP_ACTIVE_PROGRAM_DMA1); + BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); + PUSH_DATA (push, 0x02000000); + + nv30->state.fragprog = NULL; + nv30->dirty |= NV30_NEW_FRAGPROG; + + /* texture */ + texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; + texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER; + texfmt |= NV40_3D_TEX_FORMAT_RECT; + texfmt |= 0x00008000; + if (src->d < 2) + texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D; + else + texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D; + if (src->pitch) + texfmt |= NV40_3D_TEX_FORMAT_LINEAR; + + BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8); + PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR, + NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); + PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE | + NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE | + NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE); + PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE); + PUSH_DATA (push, texswz); + switch (filter) { + case BILINEAR: + PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR | + NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000); + break; + default: + PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST | + NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000); + break; + } + PUSH_DATA (push, (src->w << 16) | src->h); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1); + PUSH_DATA (push, 0x00100000 | src->pitch); + BEGIN_NV04(push, SUBC_3D(0x0b40), 1); + PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000); + BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1); + PUSH_DATA (push, 1); + + nv30->fragprog.dirty_samplers |= 1; + nv30->dirty |= NV30_NEW_FRAGTEX; + + /* blit! */ + BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); + PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0); + PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0); + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x0); + PUSH_DATAf(push, src->y0); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y0 << 16) | dst->x0); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x1); + PUSH_DATAf(push, src->y0); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y0 << 16) | dst->x1); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x1); + PUSH_DATAf(push, src->y1); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y1 << 16) | dst->x1); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); + PUSH_DATAf(push, src->x0); + PUSH_DATAf(push, src->y1); + PUSH_DATAf(push, src->z); + BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); + PUSH_DATA (push, (dst->y1 << 16) | dst->x0); + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); + PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); +} + +static boolean +nv30_transfer_sifm(XFER_ARGS) +{ + if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2) + return FALSE; + + if (src->d > 1 || dst->d > 1) + return FALSE; + + if (dst->offset & 63) + return FALSE; -struct nv30_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; + if (!dst->pitch) { + if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2) + return FALSE; + } else { + if (dst->domain != NOUVEAU_BO_VRAM) + return FALSE; + if (dst->pitch & 63) + return FALSE; + } + + return TRUE; +} static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)pt; - struct nv30_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv30_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; +nv30_transfer_rect_sifm(XFER_ARGS) + +{ + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, dst->domain | NOUVEAU_BO_WR }, + }; + struct nv04_fifo *fifo = push->channel->data; + unsigned si_fmt, si_arg; + unsigned ss_fmt; + + switch (dst->cpp) { + case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break; + case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break; + default: + ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8; + break; + } + + switch (src->cpp) { + case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break; + case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break; + default: + si_fmt = NV03_SIFM_COLOR_FORMAT_AY8; + break; + } + + if (filter == NEAREST) { + si_arg = NV03_SIFM_FORMAT_ORIGIN_CENTER; + si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE; + } else { + si_arg = NV03_SIFM_FORMAT_ORIGIN_CORNER; + si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR; + } + + if (nouveau_pushbuf_space(push, 32, 6, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + if (dst->pitch) { + BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV04_SF2D(FORMAT), 4); + PUSH_DATA (push, ss_fmt); + PUSH_DATA (push, dst->pitch << 16 | dst->pitch); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); + PUSH_DATA (push, nv30->screen->surf2d->handle); + } else { + BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1); + PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2); + PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) | + (util_logbase2(dst->h) << 24)); + PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); + BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); + PUSH_DATA (push, nv30->screen->swzsurf->handle); + } + + BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1); + PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); + BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8); + PUSH_DATA (push, si_fmt); + PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY); + PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); + PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); + PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); + PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); + PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0)); + PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0)); + BEGIN_NV04(push, NV03_SIFM(SIZE), 4); + PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2)); + PUSH_DATA (push, src->pitch | si_arg); + PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4); +} + +/* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required + * to prevent some odd things from happening, easily reproducible by + * attempting to do conditional rendering that has a M2MF transfer done + * some time before it. 0x1e98 will fail with a DMA_W_PROTECTION (assuming + * that name is still accurate on nv4x) error. + */ + +static boolean +nv30_transfer_m2mf(XFER_ARGS) +{ + if (!src->pitch || !dst->pitch) + return FALSE; + if (nv30_transfer_scaled(src, dst)) + return FALSE; + return TRUE; } static void -nv30_transfer_del(struct pipe_transfer *ptx) +nv30_transfer_rect_m2mf(XFER_ARGS) { - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; + struct nouveau_pushbuf *push = nv30->base.pushbuf; + struct nouveau_pushbuf_refn refs[] = { + { src->bo, src->domain | NOUVEAU_BO_RD }, + { dst->bo, dst->domain | NOUVEAU_BO_WR }, + }; + struct nv04_fifo *fifo = push->channel->data; + unsigned src_offset = src->offset; + unsigned dst_offset = dst->offset; + unsigned w = dst->x1 - dst->x0; + unsigned h = dst->y1 - dst->y0; - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *dst; + src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp); + dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp); - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); + PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, - tx->surface, 0, 0, - dst->width, dst->height); + while (h) { + unsigned lines = (h > 2047) ? 2047 : h; - pipe_surface_reference(&dst, NULL); - } + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, src->pitch); + PUSH_DATA (push, dst->pitch); + PUSH_DATA (push, w * src->cpp); + PUSH_DATA (push, lines); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + + h -= lines; + src_offset += src->pitch * lines; + dst_offset += dst->pitch * lines; + } +} + +static boolean +nv30_transfer_cpu(XFER_ARGS) +{ + if (nv30_transfer_scaled(src, dst)) + return FALSE; + return TRUE; +} + +static char * +linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + return base + (y * rect->pitch) + (x * rect->cpp); } -static void * -nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +static INLINE unsigned +swizzle2d(unsigned v, unsigned s) { - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); + v = (v | (v << 8)) & 0x00ff00ff; + v = (v | (v << 4)) & 0x0f0f0f0f; + v = (v | (v << 2)) & 0x33333333; + v = (v | (v << 1)) & 0x55555555; + return v << s; +} + +static char * +swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + unsigned k = util_logbase2(MIN2(rect->w, rect->h)); + unsigned km = (1 << k) - 1; + unsigned nx = rect->w >> k; + unsigned tx = x >> k; + unsigned ty = y >> k; + unsigned m; + + m = swizzle2d(x & km, 0); + m |= swizzle2d(y & km, 1); + m += ((ty * nx) + tx) << k << k; - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + return base + (m * rect->cpp); +} + +static char * +swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) +{ + unsigned w = rect->w >> 1; + unsigned h = rect->h >> 1; + unsigned d = rect->d >> 1; + unsigned i = 0, o; + unsigned v = 0; + + do { + o = i; + if (w) { + v |= (x & 1) << i++; + x >>= 1; + w >>= 1; + } + if (h) { + v |= (y & 1) << i++; + y >>= 1; + h >>= 1; + } + if (d) { + v |= (z & 1) << i++; + z >>= 1; + d >>= 1; + } + } while(o != i); + + return base + (v * rect->cpp); +} + +typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int); + +static INLINE get_ptr_t +get_ptr(struct nv30_rect *rect) +{ + if (rect->pitch) + return linear_ptr; + + if (rect->d <= 1) + return swizzle2d_ptr; + + return swizzle3d_ptr; } static void -nv30_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nv30_transfer_rect_cpu(XFER_ARGS) { - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; + get_ptr_t sp = get_ptr(src); + get_ptr_t dp = get_ptr(dst); + char *srcmap, *dstmap; + int x, y; + + nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client); + nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client); + srcmap = src->bo->map + src->offset; + dstmap = dst->bo->map + dst->offset; - pipe_buffer_unmap(pscreen, mt->buffer); + for (y = 0; y < (dst->y1 - dst->y0); y++) { + for (x = 0; x < (dst->x1 - dst->x0); x++) { + memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z), + sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp); + } + } } void -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen) +nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter, + struct nv30_rect *src, struct nv30_rect *dst) { - pscreen->get_tex_transfer = nv30_transfer_new; - pscreen->tex_transfer_destroy = nv30_transfer_del; - pscreen->transfer_map = nv30_transfer_map; - pscreen->transfer_unmap = nv30_transfer_unmap; + static const struct { + char *name; + boolean (*possible)(XFER_ARGS); + void (*execute)(XFER_ARGS); + } *method, methods[] = { + { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf }, + { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm }, + { "blit", nv30_transfer_blit, nv30_transfer_rect_blit }, + { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu }, + {} + }; + + method = methods - 1; + while ((++method)->possible) { + if (method->possible(nv30, filter, src, dst)) { + method->execute(nv30, filter, src, dst); + return; + } + } + + assert(0); +} + +void +nv30_transfer_push_data(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned offset, unsigned domain, + unsigned size, void *data) +{ + /* use ifc, or scratch + copy_data? */ + fprintf(stderr, "nv30: push_data not implemented\n"); +} + +void +nv30_transfer_copy_data(struct nouveau_context *nv, + struct nouveau_bo *dst, unsigned d_off, unsigned d_dom, + struct nouveau_bo *src, unsigned s_off, unsigned s_dom, + unsigned size) +{ + struct nv04_fifo *fifo = nv->screen->channel->data; + struct nouveau_pushbuf_refn refs[] = { + { src, s_dom | NOUVEAU_BO_RD }, + { dst, d_dom | NOUVEAU_BO_WR }, + }; + struct nouveau_pushbuf *push = nv->pushbuf; + unsigned pages, lines; + + pages = size >> 12; + size -= (pages << 12); + + BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); + PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); + + while (pages) { + lines = (pages > 2047) ? 2047 : pages; + pages -= lines; + + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, 4096); + PUSH_DATA (push, 4096); + PUSH_DATA (push, 4096); + PUSH_DATA (push, lines); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + + s_off += (lines << 12); + d_off += (lines << 12); + } + + if (size) { + if (nouveau_pushbuf_space(push, 13, 2, 0) || + nouveau_pushbuf_refn (push, refs, 2)) + return; + + BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); + PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); + PUSH_DATA (push, size); + PUSH_DATA (push, size); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | + NV03_M2MF_FORMAT_OUTPUT_INC_1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); + PUSH_DATA (push, 0x00000000); + BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); + PUSH_DATA (push, 0x00000000); + } }