From 3ce80f924d07648040ab08a9cd30588621fb47a1 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 5 Apr 2016 16:00:56 +0200 Subject: [PATCH] nv50/ir: add support for SULDP -> SULDB conversion This will allow to convert surface formats without adding an extra call to our lib. [hakzsam: make use of this for GK104] Signed-off-by: Samuel Pitoiset Signed-off-by: Ilia Mirkin --- .../drivers/nouveau/codegen/nv50_ir.cpp | 49 ++++++ src/gallium/drivers/nouveau/codegen/nv50_ir.h | 71 ++++++++ .../nouveau/codegen/nv50_ir_from_tgsi.cpp | 59 +++++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 158 +++++++++++++----- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 5 files changed, 293 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 75e5fd843c2..c95aacf48f7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -965,6 +965,55 @@ const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = { "BUFFER", 1, 1, false, false, false }, }; +const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = +{ + { "NONE", 0, { 0, 0, 0, 0 }, UINT }, + + { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT }, + { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT }, + { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT }, + { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT }, + { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT }, + { "R32F", 1, { 32, 0, 0, 0 }, FLOAT }, + { "R16F", 1, { 16, 0, 0, 0 }, FLOAT }, + + { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT }, + { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT }, + { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT }, + { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT }, + { "RG32UI", 2, { 32, 32, 0, 0 }, UINT }, + { "RG16UI", 2, { 16, 16, 0, 0 }, UINT }, + { "RG8UI", 2, { 8, 8, 0, 0 }, UINT }, + { "R32UI", 1, { 32, 0, 0, 0 }, UINT }, + { "R16UI", 1, { 16, 0, 0, 0 }, UINT }, + { "R8UI", 1, { 8, 0, 0, 0 }, UINT }, + + { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT }, + { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT }, + { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT }, + { "RG32I", 2, { 32, 32, 0, 0 }, SINT }, + { "RG16I", 2, { 16, 16, 0, 0 }, SINT }, + { "RG8I", 2, { 8, 8, 0, 0 }, SINT }, + { "R32I", 1, { 32, 0, 0, 0 }, SINT }, + { "R16I", 1, { 16, 0, 0, 0 }, SINT }, + { "R8I", 1, { 8, 0, 0, 0 }, SINT }, + + { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM }, + { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM }, + { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM }, + { "RG16", 2, { 16, 16, 0, 0 }, UNORM }, + { "RG8", 2, { 8, 8, 0, 0 }, UNORM }, + { "R16", 1, { 16, 0, 0, 0 }, UNORM }, + { "R8", 1, { 8, 0, 0, 0 }, UNORM }, + + { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM }, + { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM }, + { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM }, + { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM }, + { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM }, + { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, +}; + void TexInstruction::setIndirectR(Value *v) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index c52e8619463..94e54bbccc1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -364,6 +364,65 @@ enum TexTarget TEX_TARGET_COUNT }; +enum ImgFormat +{ + FMT_NONE, + + FMT_RGBA32F, + FMT_RGBA16F, + FMT_RG32F, + FMT_RG16F, + FMT_R11G11B10F, + FMT_R32F, + FMT_R16F, + + FMT_RGBA32UI, + FMT_RGBA16UI, + FMT_RGB10A2UI, + FMT_RGBA8UI, + FMT_RG32UI, + FMT_RG16UI, + FMT_RG8UI, + FMT_R32UI, + FMT_R16UI, + FMT_R8UI, + + FMT_RGBA32I, + FMT_RGBA16I, + FMT_RGBA8I, + FMT_RG32I, + FMT_RG16I, + FMT_RG8I, + FMT_R32I, + FMT_R16I, + FMT_R8I, + + FMT_RGBA16, + FMT_RGB10A2, + FMT_RGBA8, + FMT_RG16, + FMT_RG8, + FMT_R16, + FMT_R8, + + FMT_RGBA16_SNORM, + FMT_RGBA8_SNORM, + FMT_RG16_SNORM, + FMT_RG8_SNORM, + FMT_R16_SNORM, + FMT_R8_SNORM, + + IMG_FORMAT_COUNT, +}; + +enum ImgType { + UINT, + SINT, + UNORM, + SNORM, + FLOAT, +}; + enum SVSemantic { SV_POSITION, // WPOS @@ -900,6 +959,17 @@ public: enum TexTarget target; }; +public: + struct ImgFormatDesc + { + char name[19]; + uint8_t components; + uint8_t bits[4]; + ImgType type; + }; + + static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT]; + public: TexInstruction(Function *, operation); virtual ~TexInstruction(); @@ -939,6 +1009,7 @@ public: int8_t offset[3]; // only used on nv50 enum TexQuery query; + const struct ImgFormatDesc *format; } tex; ValueRef dPdx[3]; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index fd88af35b77..ce678983841 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -39,6 +39,7 @@ static nv50_ir::DataFile translateFile(uint file); static nv50_ir::TexTarget translateTexture(uint texTarg); static nv50_ir::SVSemantic translateSysVal(uint sysval); static nv50_ir::CacheMode translateCacheMode(uint qualifier); +static nv50_ir::ImgFormat translateImgFormat(uint format); class Instruction { @@ -454,6 +455,64 @@ static nv50_ir::CacheMode translateCacheMode(uint qualifier) return nv50_ir::CACHE_CA; } +static nv50_ir::ImgFormat translateImgFormat(uint format) +{ + +#define FMT_CASE(a, b) \ + case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b + + switch (format) { + FMT_CASE(NONE, NONE); + + FMT_CASE(R32G32B32A32_FLOAT, RGBA32F); + FMT_CASE(R16G16B16A16_FLOAT, RGBA16F); + FMT_CASE(R32G32_FLOAT, RG32F); + FMT_CASE(R16G16_FLOAT, RG16F); + FMT_CASE(R11G11B10_FLOAT, R11G11B10F); + FMT_CASE(R32_FLOAT, R32F); + FMT_CASE(R16_FLOAT, R16F); + + FMT_CASE(R32G32B32A32_UINT, RGBA32UI); + FMT_CASE(R16G16B16A16_UINT, RGBA16UI); + FMT_CASE(R10G10B10A2_UINT, RGB10A2UI); + FMT_CASE(R8G8B8A8_UINT, RGBA8UI); + FMT_CASE(R32G32_UINT, RG32UI); + FMT_CASE(R16G16_UINT, RG16UI); + FMT_CASE(R8G8_UINT, RG8UI); + FMT_CASE(R32_UINT, R32UI); + FMT_CASE(R16_UINT, R16UI); + FMT_CASE(R8_UINT, R8UI); + + FMT_CASE(R32G32B32A32_SINT, RGBA32I); + FMT_CASE(R16G16B16A16_SINT, RGBA16I); + FMT_CASE(R8G8B8A8_SINT, RGBA8I); + FMT_CASE(R32G32_SINT, RG32I); + FMT_CASE(R16G16_SINT, RG16I); + FMT_CASE(R8G8_SINT, RG8I); + FMT_CASE(R32_SINT, R32I); + FMT_CASE(R16_SINT, R16I); + FMT_CASE(R8_SINT, R8I); + + FMT_CASE(R16G16B16A16_UNORM, RGBA16); + FMT_CASE(R10G10B10A2_UNORM, RGB10A2); + FMT_CASE(R8G8B8A8_UNORM, RGBA8); + FMT_CASE(R16G16_UNORM, RG16); + FMT_CASE(R8G8_UNORM, RG8); + FMT_CASE(R16_UNORM, R16); + FMT_CASE(R8_UNORM, R8); + + FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM); + FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM); + FMT_CASE(R16G16_SNORM, RG16_SNORM); + FMT_CASE(R8G8_SNORM, RG8_SNORM); + FMT_CASE(R16_SNORM, R16_SNORM); + FMT_CASE(R8_SNORM, R8_SNORM); + } + + assert(!"Unexpected format"); + return nv50_ir::FMT_NONE; +} + nv50_ir::DataType Instruction::inferSrcType() const { switch (getOpcode()) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 03159e890fd..82f5f211d0b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1765,54 +1765,121 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) su->setSrc(2, pred); } +static DataType +getSrcType(const TexInstruction::ImgFormatDesc *t, int c) +{ + switch (t->type) { + case FLOAT: return t->bits[c] == 16 ? TYPE_F16 : TYPE_F32; + case UNORM: return t->bits[c] == 8 ? TYPE_U8 : TYPE_U16; + case SNORM: return t->bits[c] == 8 ? TYPE_S8 : TYPE_S16; + case UINT: + return (t->bits[c] == 8 ? TYPE_U8 : + (t->bits[c] == 16 ? TYPE_U16 : TYPE_U32)); + case SINT: + return (t->bits[c] == 8 ? TYPE_S8 : + (t->bits[c] == 16 ? TYPE_S16 : TYPE_S32)); + } + return TYPE_NONE; +} + +static DataType +getDestType(const ImgType type) { + switch (type) { + case FLOAT: + case UNORM: + case SNORM: + return TYPE_F32; + case UINT: + return TYPE_U32; + case SINT: + return TYPE_S32; + default: + assert(!"Impossible type"); + return TYPE_NONE; + } +} + void -NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) +NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) { - processSurfaceCoordsNVE4(su); + const TexInstruction::ImgFormatDesc *format = su->tex.format; + int width = format->bits[0] + format->bits[1] + + format->bits[2] + format->bits[3]; + Value *untypedDst[4] = {}; + Value *typedDst[4] = {}; + + // We must convert this to a generic load. + su->op = OP_SULDB; + + su->dType = typeOfSize(width / 8); + su->sType = TYPE_U8; + + for (int i = 0; i < width / 32; i++) + untypedDst[i] = bld.getSSA(); + if (width < 32) + untypedDst[0] = bld.getSSA(); + + for (int i = 0; i < 4; i++) { + typedDst[i] = su->getDef(i); + } + + // Set the untyped dsts as the su's destinations + for (int i = 0; i < 4; i++) + su->setDef(i, untypedDst[i]); + + bld.setPosition(su, true); + + // Unpack each component into the typed dsts + int bits = 0; + for (int i = 0; i < 4; bits += format->bits[i], i++) { + if (!typedDst[i]) + continue; + if (i >= format->components) { + if (format->type == FLOAT || + format->type == UNORM || + format->type == SNORM) + bld.loadImm(typedDst[i], i == 3 ? 1.0f : 0.0f); + else + bld.loadImm(typedDst[i], i == 3 ? 1 : 0); + continue; + } - // Who do we hate more ? The person who decided that nvc0's SULD doesn't - // have to support conversion or the person who decided that, in OpenCL, - // you don't have to specify the format here like you do in OpenGL ? - - if (su->op == OP_SULDP) { - // We don't patch shaders. Ever. - // You get an indirect call to our library blob here. - // But at least it's uniform. - FlowInstruction *call; - LValue *p[3]; - LValue *r[5]; - uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL; - - for (int i = 0; i < 4; ++i) - (r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i; - for (int i = 0; i < 3; ++i) - (p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i; - (r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4; - - bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8); - bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8); - bld.mkMov(p[0], su->getSrc(2), TYPE_U8); - bld.mkMov(r[4], su->getSrc(0), TYPE_U64); - bld.mkMov(r[2], su->getSrc(1), TYPE_U32); - - call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate()); - - call->indirect = 1; - call->absolute = 1; - call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, - prog->driver->io.auxCBSlot, TYPE_U32, - prog->driver->io.suInfoBase + base)); - call->setSrc(1, r[2]); - call->setSrc(2, r[4]); - for (int i = 0; i < 3; ++i) - call->setSrc(3 + i, p[i]); - for (int i = 0; i < 4; ++i) { - call->setDef(i, r[i]); - bld.mkMov(su->getDef(i), r[i]); + // Get just that component's data into the relevant place + if (format->bits[i] == 32) + bld.mkMov(typedDst[i], untypedDst[i]); + else if (format->bits[i] == 16) + bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], + getSrcType(format, i), untypedDst[i / 2]) + ->subOp = (i & 1) << (format->type == FLOAT ? 0 : 1); + else if (format->bits[i] == 8) + bld.mkCvt(OP_CVT, getDestType(format->type), typedDst[i], + getSrcType(format, i), untypedDst[0])->subOp = i; + else { + bld.mkOp2(OP_EXTBF, TYPE_U32, typedDst[i], untypedDst[bits / 32], + bld.mkImm((bits % 32) | (format->bits[i] << 8))); + if (format->type == UNORM || format->type == SNORM) + bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], getSrcType(format, i), typedDst[i]); + } + + // Normalize / convert as necessary + if (format->type == UNORM) + bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << format->bits[i]) - 1))); + else if (format->type == SNORM) + bld.mkOp2(OP_MUL, TYPE_F32, typedDst[i], typedDst[i], bld.loadImm(NULL, 1.0f / ((1 << (format->bits[i] - 1)) - 1))); + else if (format->type == FLOAT && format->bits[i] < 16) { + bld.mkOp2(OP_SHL, TYPE_U32, typedDst[i], typedDst[i], bld.loadImm(NULL, 15 - format->bits[i])); + bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16, typedDst[i]); } - call->setDef(4, p[1]); - delete_Instruction(bld.getProgram(), su); } +} + +void +NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) +{ + processSurfaceCoordsNVE4(su); + + if (su->op == OP_SULDP) + convertSurfaceFormat(su); if (su->op == OP_SUREDB || su->op == OP_SUREDP) { // FIXME: for out of bounds access, destination value will be undefined ! @@ -1840,9 +1907,10 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) red->setPredicate(cc, pred); delete_Instruction(bld.getProgram(), su); handleCasExch(red, true); - } else { - su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; } + + if (su->op == OP_SUSTB || su->op == OP_SUSTP) + su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; } bool diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index cbd26af362f..17883a9b8f6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -137,6 +137,7 @@ private: void adjustCoordinatesMS(TexInstruction *); void processSurfaceCoordsNVE4(TexInstruction *); + void convertSurfaceFormat(TexInstruction *); protected: BuildUtil bld; -- 2.30.2