nv50,nvc0: Copy shared memory per block to the program info structure and back
authorPierre Moreau <pierre.morrow@free.fr>
Mon, 2 Oct 2017 18:57:10 +0000 (20:57 +0200)
committerIlia Mirkin <imirkin@alum.mit.edu>
Sat, 4 Nov 2017 18:12:07 +0000 (14:12 -0400)
In OpenCL/CUDA kernels, shared memory usage can be defined within the
kernel code. Those usage will only be picked up while parsing the
SPIR-V, during the translation phase of the program.

Signed-off-by: Pierre Moreau <pierre.morrow@free.fr>
src/gallium/drivers/nouveau/nv50/nv50_program.c
src/gallium/drivers/nouveau/nvc0/nvc0_program.c

index 92e73f8c12c6dd9a3dd97afe887097781f21dd16..6b472d7fdd08ef6e05d1a536c85020fd122a7ca5 100644 (file)
@@ -336,6 +336,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    info->bin.sourceRep = PIPE_SHADER_IR_TGSI;
    info->bin.source = (void *)prog->pipe.tokens;
 
+   info->bin.smemSize = prog->cp.smem_size;
    info->io.auxCBSlot = 15;
    info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
    info->io.genUserClip = prog->vp.clpd_nr;
@@ -382,6 +383,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    prog->interps = info->bin.fixupData;
    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
    prog->tls_space = info->bin.tlsSpace;
+   prog->cp.smem_size = info->bin.smemSize;
    prog->mul_zero_wins = info->io.mul_zero_wins;
    prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
 
index e43a8de9f5966523764b703c117c1345c05e1334..a6112f401effee3f1ec7197c0fbcd4cc11c9bc49 100644 (file)
@@ -579,6 +579,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    info->optLevel = 3;
 #endif
 
+   info->bin.smemSize = prog->cp.smem_size;
    info->io.genUserClip = prog->vp.num_ucps;
    info->io.auxCBSlot = 15;
    info->io.msInfoCBSlot = 15;
@@ -618,6 +619,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
    prog->relocs = info->bin.relocData;
    prog->fixups = info->bin.fixupData;
    prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+   prog->cp.smem_size = info->bin.smemSize;
    prog->num_barriers = info->numBarriers;
 
    prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;