From efe532b73934299bb6eeeecf0aaab7145317fd51 Mon Sep 17 00:00:00 2001 From: Pierre Moreau Date: Mon, 2 Oct 2017 20:57:10 +0200 Subject: [PATCH] nv50,nvc0: Copy shared memory per block to the program info structure and back In OpenCL/CUDA kernels, shared memory usage can be defined within the kernel code. Those usage will only be picked up while parsing the SPIR-V, during the translation phase of the program. Signed-off-by: Pierre Moreau --- src/gallium/drivers/nouveau/nv50/nv50_program.c | 2 ++ src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 92e73f8c12c..6b472d7fdd0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -336,6 +336,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->bin.sourceRep = PIPE_SHADER_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; + info->bin.smemSize = prog->cp.smem_size; info->io.auxCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; @@ -382,6 +383,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, prog->interps = info->bin.fixupData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; + prog->cp.smem_size = info->bin.smemSize; prog->mul_zero_wins = info->io.mul_zero_wins; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index e43a8de9f59..a6112f401ef 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -579,6 +579,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->optLevel = 3; #endif + info->bin.smemSize = prog->cp.smem_size; info->io.genUserClip = prog->vp.num_ucps; info->io.auxCBSlot = 15; info->io.msInfoCBSlot = 15; @@ -618,6 +619,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->relocs = info->bin.relocData; prog->fixups = info->bin.fixupData; prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->cp.smem_size = info->bin.smemSize; prog->num_barriers = info->numBarriers; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; -- 2.30.2