From: Christoph Bumiller Date: Sat, 31 Oct 2009 12:38:22 +0000 (+0100) Subject: nv50: use SIFC also for shader upload X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9831e1f76cd020e1cde2b13e03149415319a8135;p=mesa.git nv50: use SIFC also for shader upload Adds a more generic SIFC transfer function. --- diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 33667e8765c..890defb90cb 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -196,7 +196,8 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); extern void nv50_linkage_validate(struct nv50_context *nv50); -extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); +extern void nv50_program_destroy(struct nv50_context *nv50, + struct nv50_program *p); /* nv50_state_validate.c */ extern boolean nv50_state_validate(struct nv50_context *nv50); @@ -210,4 +211,12 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50, /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); +/* nv50_transfer.c */ +extern void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp); + #endif diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index c3edc02cb51..faf638949f4 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2980,11 +2980,8 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program_exec *e; - struct nouveau_stateobj *so; - const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; - unsigned start, count, *up, *ptr; + uint32_t *up, i; boolean upload = FALSE; if (!p->bo) { @@ -2999,32 +2996,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; - for (e = p->exec_head; e; e = e->next) { + up = MALLOC(p->exec_size * 4); + + for (i = 0, e = p->exec_head; e; e = e->next) { unsigned ei, ci, bs; - if (e->param.index < 0) - continue; + if (e->param.index >= 0 && e->param.mask) { + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); + ei = e->param.shift >> 5; + ci = e->param.index; + if (bs == 0) + ci += p->data[bs]->start; - if (e->param.mask == 0) { + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } else + if (e->param.index >= 0) { + /* zero mask means param is a jump/branch offset */ assert(!(e->param.index & 1)); /* seem to be 8 byte steps */ ei = (e->param.index >> 1) + 0 /* START_ID */; e->inst[0] &= 0xf0000fff; e->inst[0] |= ei << 12; - continue; } - bs = (e->inst[1] >> 22) & 0x07; - assert(bs < 2); - ei = e->param.shift >> 5; - ci = e->param.index; - if (bs == 0) - ci += p->data[bs]->start; - - e->inst[ei] &= ~e->param.mask; - e->inst[ei] |= (ci << e->param.shift); + up[i++] = e->inst[0]; + if (is_long(e)) + up[i++] = e->inst[1]; } + assert(i == p->exec_size); if (p->data[0]) p->data_start[0] = p->data[0]->start; @@ -3037,45 +3039,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } #endif - - up = ptr = MALLOC(p->exec_size * 4); - for (e = p->exec_head; e; e = e->next) { - *(ptr++) = e->inst[0]; - if (is_long(e)) - *(ptr++) = e->inst[1]; - } - - so = so_new(4,2); - so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); - - start = 0; count = p->exec_size; - while (count) { - struct nouveau_channel *chan = nv50->screen->base.channel; - unsigned nr; - - so_emit(chan, so); - - nr = MIN2(count, 2047); - nr = MIN2(chan->pushbuf->remaining, nr); - if (chan->pushbuf->remaining < (nr + 3)) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); - OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); - OUT_RINGp (chan, up + start, nr); - - start += nr; - count -= nr; - } + nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, + NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, + up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, + 0, 0, p->exec_size * 4, 1, 1); FREE(up); - so_ref(NULL, &so); } void diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 9c289026bbb..f1eb6723366 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -237,3 +237,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) pscreen->transfer_map = nv50_transfer_map; pscreen->transfer_unmap = nv50_transfer_unmap; } + +void +nv50_upload_sifc(struct nv50_context *nv50, + struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc, + unsigned dst_format, int dst_w, int dst_h, int dst_pitch, + void *src, unsigned src_format, int src_pitch, + int x, int y, int w, int h, int cpp) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned line_dwords = (w * cpp + 3) / 4; + + reloc |= NOUVEAU_BO_WR; + + WAIT_RING (chan, 32); + + if (bo->tile_flags) { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); + OUT_RING (chan, dst_format); + OUT_RING (chan, 0); + OUT_RING (chan, bo->tile_mode << 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } else { + BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2); + OUT_RING (chan, dst_format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1); + OUT_RING (chan, dst_pitch); + } + + BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4); + OUT_RING (chan, dst_w); + OUT_RING (chan, dst_h); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + + /* NV50_2D_OPERATION_SRCCOPY assumed already set */ + + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + OUT_RING (chan, 0); + OUT_RING (chan, src_format); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); + OUT_RING (chan, w); + OUT_RING (chan, h); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, x); + OUT_RING (chan, 0); + OUT_RING (chan, y); + + while (h--) { + const uint32_t *p = src; + unsigned count = line_dwords; + + while (count) { + unsigned nr = MIN2(count, 1792); + + if (chan->pushbuf->remaining <= nr) { + FIRE_RING (chan); + + BEGIN_RING(chan, eng2d, + NV50_2D_DST_ADDRESS_HIGH, 2); + OUT_RELOCh(chan, bo, dst_offset, reloc); + OUT_RELOCl(chan, bo, dst_offset, reloc); + } + assert(chan->pushbuf->remaining > nr); + + BEGIN_RING(chan, eng2d, + NV50_2D_SIFC_DATA | (2 << 29), nr); + OUT_RINGp (chan, p, nr); + + p += nr; + count -= nr; + } + + src += src_pitch; + } + + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); +}