From: Samuel Pitoiset Date: Thu, 25 Jul 2013 08:35:35 +0000 (+0200) Subject: nvc0: implement compute support for nvc0 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9dcd7888e6338f08a6999abfbc2ca1008f741bf8;p=mesa.git nvc0: implement compute support for nvc0 Tested on nvc0, nvc1, nvcf and nvd9. --- diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources index db8d12347b0..815a27addd4 100644 --- a/src/gallium/drivers/nvc0/Makefile.sources +++ b/src/gallium/drivers/nvc0/Makefile.sources @@ -1,4 +1,5 @@ C_SOURCES := \ + nvc0_compute.c \ nvc0_context.c \ nvc0_formats.c \ nvc0_miptree.c \ diff --git a/src/gallium/drivers/nvc0/nvc0_compute.c b/src/gallium/drivers/nvc0/nvc0_compute.c new file mode 100644 index 00000000000..464b72f34d4 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_compute.c @@ -0,0 +1,271 @@ +/* + * Copyright 2013 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christoph Bumiller, Samuel Pitoiset + */ + +#include "nvc0_context.h" +#include "nvc0_compute.h" + +int +nvc0_screen_compute_setup(struct nvc0_screen *screen, + struct nouveau_pushbuf *push) +{ + struct nouveau_object *chan = screen->base.channel; + struct nouveau_device *dev = screen->base.device; + uint32_t obj_class; + int ret; + int i; + + switch (dev->chipset & 0xf0) { + case 0xc0: + if (dev->chipset == 0xc8) + obj_class = NVC8_COMPUTE_CLASS; + else + obj_class = NVC0_COMPUTE_CLASS; + break; + case 0xd0: + obj_class = NVC0_COMPUTE_CLASS; + break; + default: + NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); + return -1; + } + + ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, + &screen->compute); + if (ret) { + NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); + return ret; + } + + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL, + &screen->parm); + if (ret) + return ret; + + BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->compute->oclass); + + /* hardware limit */ + BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + PUSH_DATA (push, screen->mp_count); + BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + PUSH_DATA (push, 0xf); + + BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + PUSH_DATA (push, 0x8000); + + /* global memory setup */ + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 0); + BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + for (i = 0; i <= 0xff; i++) + PUSH_DATA (push, (0xc << 28) | (i << 16) | i); + BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + PUSH_DATA (push, 1); + + /* local memory and cstack setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->tls->offset); + PUSH_DATA (push, screen->tls->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + PUSH_DATAh(push, screen->tls->size); + PUSH_DATA (push, screen->tls->size); + BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); + PUSH_DATA (push, 1 << 24); + + /* shared memory setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); + PUSH_DATA (push, 2 << 24); + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + PUSH_DATA (push, 0); + + /* code segment setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); + + /* bind parameters buffer */ + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, screen->parm->size); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + + /* TODO: textures & samplers */ + + return 0; +} + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0) +{ + struct nvc0_program *prog = nvc0->compprog; + + if (prog->mem) + return TRUE; + + if (!prog->translated) { + prog->translated = nvc0_program_translate( + prog, nvc0->screen->base.device->chipset); + if (!prog->translated) + return FALSE; + } + if (unlikely(!prog->code_size)) + return FALSE; + + if (likely(prog->code_size)) { + if (nvc0_program_upload_code(nvc0, prog)) { + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); + return TRUE; + } + } + return FALSE; +} + +static boolean +nvc0_compute_state_validate(struct nvc0_context *nvc0) +{ + if (!nvc0_compute_validate_program(nvc0)) + return FALSE; + + /* TODO: textures, samplers, surfaces, global memory buffers */ + + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE); + + nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp); + if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf))) + return FALSE; + if (unlikely(nvc0->state.flushed)) + nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE); + + return TRUE; + +} + +static void +nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + struct nvc0_program *cp = nvc0->compprog; + + if (cp->parm_size) { + BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + PUSH_DATA (push, align(cp->parm_size, 0x100)); + PUSH_DATAh(push, screen->parm->offset); + PUSH_DATA (push, screen->parm->offset); + BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ + BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + PUSH_DATA (push, 0); + PUSH_DATAp(push, input, cp->parm_size / 4); + + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); + } +} + +void +nvc0_launch_grid(struct pipe_context *pipe, + const uint *block_layout, const uint *grid_layout, + uint32_t label, + const void *input) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + unsigned s, i; + int ret; + + ret = !nvc0_compute_state_validate(nvc0); + if (ret) + goto out; + + nvc0_compute_upload_input(nvc0, input); + + BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + + BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ + + BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); + PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, cp->num_barriers); + BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + PUSH_DATA (push, cp->num_gprs); + + /* grid/block setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); + PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); + PUSH_DATA (push, grid_layout[2]); + BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); + PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); + PUSH_DATA (push, block_layout[2]); + + /* launch preliminary setup */ + BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + PUSH_DATA (push, 0x1); + BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); + PUSH_DATA (push, 0x1); + + /* rebind all the 3D constant buffers + * (looks like binding a CB on COMPUTE clobbers 3D state) */ + nvc0->dirty |= NVC0_NEW_CONSTBUF; + for (s = 0; s < 6; s++) { + for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) + if (nvc0->constbuf[s][i].u.buf) + nvc0->constbuf_dirty[s] |= 1 << i; + } + memset(nvc0->state.uniform_buffer_bound, 0, + sizeof(nvc0->state.uniform_buffer_bound)); + +out: + if (ret) + NOUVEAU_ERR("Failed to launch grid !\n"); +} diff --git a/src/gallium/drivers/nvc0/nvc0_compute.h b/src/gallium/drivers/nvc0/nvc0_compute.h new file mode 100644 index 00000000000..f2df7bed310 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_compute.h @@ -0,0 +1,10 @@ +#ifndef NVC0_COMPUTE_H +#define NVC0_COMPUTE_H + +#include "nv50/nv50_defs.xml.h" +#include "nvc0_compute.xml.h" + +boolean +nvc0_compute_validate_program(struct nvc0_context *nvc0); + +#endif /* NVC0_COMPUTE_H */ diff --git a/src/gallium/drivers/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nvc0/nvc0_compute.xml.h new file mode 100644 index 00000000000..35e6bfdbea2 --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_compute.xml.h @@ -0,0 +1,410 @@ +#ifndef NVC0_COMPUTE_XML +#define NVC0_COMPUTE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13) +- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22) +- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13) +- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13) +- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22) +- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33) + +Copyright (C) 2006-2013 by the following authors: +- Artur Huillet (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. (koala_br) +- Carlos Martin (carlosmn) +- Christoph Bumiller (calim, chrisbmr) +- Dawid Gajownik (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov (lumag) +- EdB (edb_) +- Erik Waling (erikwaling) +- Francisco Jerez (curro) +- imirkin (imirkin) +- jb17bsome (jb17bsome) +- Jeremy Kolb (kjeremy) +- Laurent Carlier (lordheavy) +- Luca Barbieri (lb, lb1) +- Maarten Maathuis (stillunknown) +- Marcin Kościelnicki (mwk, koriakin) +- Mark Carey (careym) +- Matthieu Castet (mat-c) +- nvidiaman (nvidiaman) +- Patrice Mandin (pmandin, pmdata) +- Pekka Paalanen (pq, ppaalanen) +- Peter Popov (ironpeter) +- Richard Hughes (hughsient) +- Rudi Cilibrasi (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet (leroutier) +- Stephane Marchesin (marcheu) +- sturmflut (sturmflut) +- Sylvain Munaut +- Victor Stinner (haypo) +- Wladmir van der Laan (miathan6) +- Younes Manton (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + + +#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204 + +#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208 + +#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c + +#define NVC0_COMPUTE_TEX_LIMITS 0x00000210 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000 +#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007 + +#define NVC0_COMPUTE_SHARED_BASE 0x00000214 + +#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c +#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001 +#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002 +#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004 +#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010 +#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100 +#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000 + +#define NVC0_COMPUTE_BIND_TSC 0x00000228 +#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0 +#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC 0x0000022c +#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe +#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9 + +#define NVC0_COMPUTE_BIND_TSC2 0x00000230 +#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010 +#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4 +#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000 +#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12 + +#define NVC0_COMPUTE_BIND_TIC2 0x00000234 +#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002 +#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1 +#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00 +#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9 + +#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238 +#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c + +#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244 + +#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c + +#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250 + +#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254 + +#define NVC0_COMPUTE_UNK028C 0x0000028c + +#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c +#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK02A0 0x000002a0 + +#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0 + +#define NVC0_COMPUTE_UNK02C4 0x000002c4 + +#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8 +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff +#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000 +#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16 +#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000 +#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000 + +#define NVC8_COMPUTE_UNK02E0 0x000002e0 + +#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308 +#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001 +#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003 + +#define NVC0_COMPUTE_UNK030C 0x0000030c + +#define NVC0_COMPUTE_UNK0360 0x00000360 +#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300 +#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8 +#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00 +#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10 + +#define NVC0_COMPUTE_LAUNCH 0x00000368 + +#define NVC0_COMPUTE_UNK036C 0x0000036c +#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003 +#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0 +#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c +#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2 + +#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac +#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff +#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000 +#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16 + +#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0 + +#define NVC0_COMPUTE_CP_START_ID 0x000003b4 + +#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0)) +#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004 +#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020 + +#define NVC0_COMPUTE_MP_LIMIT 0x00000758 + +#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c + +#define NVC0_COMPUTE_GRIDID 0x00000780 + +#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790 + +#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794 + +#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798 + +#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c + +#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0 + +#define NVC0_COMPUTE_COMPUTE_END 0x00000a04 +#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001 + +#define NVC0_COMPUTE_UNK0A08 0x00000a08 + +#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64 + +#define NVC0_COMPUTE_UNK0D94 0x00000d94 + +#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4 + +#define NVC0_COMPUTE_UNK10F4 0x000010f4 +#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001 +#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010 +#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100 + +#define NVC0_COMPUTE_LINKED_TSC 0x00001234 + +#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288 + +#define NVC0_COMPUTE_UNK12AC 0x000012ac + +#define NVC0_COMPUTE_TSC_FLUSH 0x00001330 +#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TIC_FLUSH 0x00001334 +#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007 +#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0 +#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4 + +#define NVC0_COMPUTE_UNK1354 0x00001354 + +#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424 + +#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550 + +#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554 + +#define NVC0_COMPUTE_COND_MODE 0x00001558 +#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000 +#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001 +#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002 +#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003 +#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004 + +#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c + +#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560 + +#define NVC0_COMPUTE_TSC_LIMIT 0x00001564 + +#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574 + +#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578 + +#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c + +#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608 + +#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c + +#define NVC0_COMPUTE_TEX_MISC 0x00001664 +#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001 +#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002 + +#define NVC0_COMPUTE_UNK1690 0x00001690 +#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001 +#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000 + +#define NVC0_COMPUTE_CB_BIND 0x00001694 +#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001 +#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00 +#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8 + +#define NVC0_COMPUTE_FLUSH 0x00001698 +#define NVC0_COMPUTE_FLUSH_CODE 0x00000001 +#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010 +#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100 +#define NVC0_COMPUTE_FLUSH_CB 0x00001000 + +#define NVC0_COMPUTE_UNK1930 0x00001930 + +#define NVC0_COMPUTE_UNK1944 0x00001944 + +#define NVC0_COMPUTE_DELAY 0x00001a24 + +#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0)) +#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004 +#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005 + +#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00 + +#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04 + +#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08 + +#define NVC0_COMPUTE_QUERY_GET 0x00001b0c +#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000 +#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003 +#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000 +#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000 + +#define NVC0_COMPUTE_CB_SIZE 0x00002380 + +#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384 + +#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388 + +#define NVC0_COMPUTE_CB_POS 0x0000238c + +#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0)) +#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004 +#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010 + +#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020 +#define NVC0_COMPUTE_IMAGE__LEN 0x00000008 + +#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0)) + +#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff +#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0 +#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000 +#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000 + +#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0)) +#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000 +#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12 + +#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0)) + +#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008 + +#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000 +#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28 + +#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0)) +#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004 +#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008 +#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000 +#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0 +#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4 + +#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc + + +#endif /* NVC0_COMPUTE_XML */ diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 8cd5fc5a646..69e1970b64e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -258,8 +258,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) pipe->draw_vbo = nvc0_draw_vbo; pipe->clear = nvc0_clear; - if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) - pipe->launch_grid = nve4_launch_grid; + pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ? + nve4_launch_grid : nvc0_launch_grid; pipe->flush = nvc0_flush; pipe->texture_barrier = nvc0_texture_barrier; diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 0431b89e151..9e589602964 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -358,4 +358,8 @@ void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); void nve4_launch_grid(struct pipe_context *, const uint *, const uint *, uint32_t, const void *); +/* nvc0_compute.c */ +void nvc0_launch_grid(struct pipe_context *, + const uint *, const uint *, uint32_t, const void *); + #endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 171a30256a4..bc5580bdadc 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -489,6 +489,11 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) switch (screen->base.device->chipset & 0xf0) { case 0xc0: case 0xd0: + /* Using COMPUTE has weird effects on 3D state, we need to + * investigate this further before enabling it by default. + */ + if (debug_get_bool_option("NVC0_COMPUTE", FALSE)) + return nvc0_screen_compute_setup(screen, screen->base.pushbuf); return 0; case 0xe0: case 0xf0: diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index ed43696ee8b..54ff6221385 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -215,6 +215,7 @@ int nvc0_screen_tic_alloc(struct nvc0_screen *, void *); int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *); int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); +int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos, uint32_t lneg, uint32_t cstack); diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c index abadd601a68..8b39f7342b4 100644 --- a/src/gallium/drivers/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nvc0/nve4_compute.c @@ -23,6 +23,7 @@ */ #include "nvc0_context.h" +#include "nvc0_compute.h" #include "nve4_compute.h" #include "nv50/codegen/nv50_ir_driver.h" @@ -297,39 +298,10 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) } -static boolean -nve4_compute_validate_program(struct nvc0_context *nvc0) -{ - struct nvc0_program *prog = nvc0->compprog; - - if (prog->mem) - return TRUE; - - if (!prog->translated) { - prog->translated = nvc0_program_translate( - prog, nvc0->screen->base.device->chipset); - if (!prog->translated) - return FALSE; - } - if (unlikely(!prog->code_size)) - return FALSE; - - if (likely(prog->code_size)) { - if (nvc0_program_upload_code(nvc0, prog)) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); - PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CODE); - return TRUE; - } - } - return FALSE; -} - - static boolean nve4_compute_state_validate(struct nvc0_context *nvc0) { - if (!nve4_compute_validate_program(nvc0)) + if (!nvc0_compute_validate_program(nvc0)) return FALSE; if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) nve4_compute_validate_textures(nvc0);