From 61d52a5fb9379eede3bf68b011f9477176341ee9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 17 Mar 2016 15:50:00 +0100 Subject: [PATCH] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for OpenCL global buffers. This commits changes the buffer code to use FILE_MEMORY_BUFFER at the ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL for use with OpenCL global buffers. Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL register file. Tested with piglet on a gf119 and a gk107: ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 / ./piglit run -o shader -t '.*arb_compute_shader.*' results/shader [20/20] skip: 4, pass: 16 | Signed-off-by: Hans de Goede Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++--- src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- .../drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 7b0eb2f95b8..5141fc62e54 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -332,6 +332,7 @@ enum DataFile FILE_MEMORY_CONST, FILE_SHADER_INPUT, FILE_SHADER_OUTPUT, + FILE_MEMORY_BUFFER, FILE_MEMORY_GLOBAL, FILE_MEMORY_SHARED, FILE_MEMORY_LOCAL, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 4f012cd3b91..0fa6cc4278e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index ce83618d681..a429ca4ebe4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1296,13 +1296,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) handleSharedATOMNVE4(atom); return true; default: - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadBufInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); assert(base->reg.size == 8); atom->setIndirect(0, 0, base); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; return true; } base = @@ -1889,7 +1890,7 @@ NVC0LoweringPass::handleLDST(Instruction *i) } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); i->op = OP_VFETCH; - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { Value *ind = i->getIndirect(0, 1); Value *ptr = loadBufInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); // XXX come up with a way not to do this for EVERY little access but @@ -1904,6 +1905,7 @@ NVC0LoweringPass::handleLDST(Instruction *i) } i->setIndirect(0, 1, NULL); i->setIndirect(0, 0, ptr); + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); i->setPredicate(CC_NOT_P, pred); if (i->defExists(0)) { @@ -2241,7 +2243,7 @@ NVC0LoweringPass::visit(Instruction *i) break; case OP_ATOM: { - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; handleATOM(i); handleCasExch(i, cctl); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 066faa367d2..39121a3a542 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -460,6 +460,7 @@ int Symbol::print(char *buf, size_t size, case FILE_MEMORY_CONST: c = 'c'; break; case FILE_SHADER_INPUT: c = 'a'; break; case FILE_SHADER_OUTPUT: c = 'o'; break; + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering case FILE_MEMORY_GLOBAL: c = 'g'; break; case FILE_MEMORY_SHARED: c = 's'; break; case FILE_MEMORY_LOCAL: c = 'l'; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 2c4d7f53d60..2af1715d1d1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const case FILE_MEMORY_CONST: return 65536; case FILE_SHADER_INPUT: return 0x200; case FILE_SHADER_OUTPUT: return 0x200; + case FILE_MEMORY_BUFFER: return 0xffffffff; case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const if (ty == TYPE_B96 || ty == TYPE_NONE) return false; if (typeSizeof(ty) > 4) - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || + (file == FILE_MEMORY_BUFFER); return true; } @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const switch (i->src(0).getFile()) { case FILE_MEMORY_LOCAL: case FILE_MEMORY_GLOBAL: + case FILE_MEMORY_BUFFER: return 100; // really 400 to 800 default: return 22; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index a03afa8dc8d..9e1e7bf5688 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const case FILE_MEMORY_CONST: return 65536; case FILE_SHADER_INPUT: return 0x400; case FILE_SHADER_OUTPUT: return 0x400; + case FILE_MEMORY_BUFFER: return 0xffffffff; case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; -- 2.30.2