#include "codegen/nv50_ir_driver.h"
-#ifdef DEBUG
+#ifndef NDEBUG
static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *);
#endif
obj_class = GM200_COMPUTE_CLASS;
break;
case 0x130:
- obj_class = dev->chipset == 0x130 ? GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS;
+ obj_class = (dev->chipset == 0x130 || dev->chipset == 0x13b) ?
+ GP100_COMPUTE_CLASS : GP104_COMPUTE_CLASS;
break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
uint64_t address
= nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
- assert(i > 0); /* we really only want uniform buffer objects */
-
- BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
- PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
- BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
- PUSH_DATA (push, 4 * 4);
- PUSH_DATA (push, 0x1);
- BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
- PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
-
- PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
- PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
- PUSH_DATA (push, nvc0->constbuf[5][i].size);
- PUSH_DATA (push, 0);
- BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
+ /* constbufs above 0 will are fetched via ubo info in the shader */
+ if (i > 0) {
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
+ PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
+ BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 4 * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATA (push, 0);
+ }
+ BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
res->cb_bindings[s] |= 1 << i;
}
}
return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
}
+static void
+nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
+{
+ // only user constant buffers 0-6 can be put in the descriptor, the rest are
+ // loaded through global memory
+ for (int i = 0; i <= 6; i++) {
+ if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf)
+ continue;
+
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[5][i].u.buf);
+
+ uint32_t base = res->offset + nvc0->constbuf[5][i].offset;
+ uint32_t size = nvc0->constbuf[5][i].size;
+ if (gp100)
+ gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size);
+ else
+ nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size);
+ }
+
+ // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because
+ // nve4_compute_upload_input() does it later
+}
+
static void
nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
struct nve4_cp_launch_desc *desc,
if (nvc0->constbuf[5][0].user || cp->parm_size) {
nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
+
+ // Later logic will attempt to bind a real buffer at position 0. That
+ // should not happen if we've bound a user buffer.
+ assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
}
nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
NVC0_CB_AUX_INFO(5), 1 << 11);
+
+ nve4_compute_setup_buf_cb(nvc0, false, desc);
}
static void
if (nvc0->constbuf[5][0].user || cp->parm_size) {
gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
+
+ // Later logic will attempt to bind a real buffer at position 0. That
+ // should not happen if we've bound a user buffer.
+ assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
}
gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
NVC0_CB_AUX_INFO(5), 1 << 11);
+
+ nve4_compute_setup_buf_cb(nvc0, true, desc);
}
static inline void *
nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
void *desc;
uint64_t desc_gpuaddr;
nve4_compute_upload_input(nvc0, info);
-#ifdef DEBUG
+#ifndef NDEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
gp100_compute_dump_launch_desc(desc);
}
/* upload descriptor and flush */
+ nouveau_pushbuf_space(push, 32, 1, 0);
+ PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
PUSH_DATA (push, desc_gpuaddr >> 8);
BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
+ nvc0_update_compute_invocations_counter(nvc0, info);
+
out:
if (ret)
NOUVEAU_ERR("Failed to launch grid !\n");
}
-#ifdef DEBUG
+#ifndef NDEBUG
static const char *nve4_cache_split_name(unsigned value)
{
switch (value) {