From f20a210dc8552daadfe403d899a9ba6cdd21560b Mon Sep 17 00:00:00 2001 From: Mark Menzynski Date: Mon, 17 Feb 2020 18:02:45 +0100 Subject: [PATCH] nv50/ir: Add nv50_ir_prog_info_out serialize and deserialize Adds functions for serializing and deserializing nv50_ir_prog_info_out structure, which are needed for shader caching. Signed-off-by: Mark Menzynski Reviewed-by: Karol Herbst Part-of: --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 49 ++++ .../nouveau/codegen/nv50_ir_emit_gk110.cpp | 14 +- .../nouveau/codegen/nv50_ir_emit_gm107.cpp | 14 +- .../nouveau/codegen/nv50_ir_emit_gv100.cpp | 14 +- .../nouveau/codegen/nv50_ir_emit_nv50.cpp | 6 +- .../nouveau/codegen/nv50_ir_emit_nvc0.cpp | 14 +- .../nouveau/codegen/nv50_ir_serialize.cpp | 230 ++++++++++++++++++ src/gallium/drivers/nouveau/meson.build | 1 + 8 files changed, 311 insertions(+), 31 deletions(-) create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 0e61e031b8e..a6f089ad9ac 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -25,6 +25,7 @@ #include "pipe/p_shader_tokens.h" +#include "util/blob.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" @@ -229,6 +230,54 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code, extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); + +#ifdef __cplusplus +namespace nv50_ir +{ + class FixupEntry; + class FixupData; + + void + gk110_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nv50_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gv100_interpApply(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gk110_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gm107_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + nvc0_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); + void + gv100_selpFlip(const nv50_ir::FixupEntry *entry, uint32_t *code, + const nv50_ir::FixupData& data); +} +#endif + +/* Serialize a nv50_ir_prog_info_out structure and save it into blob */ +extern bool MUST_CHECK +nv50_ir_prog_info_out_serialize(struct blob *, struct nv50_ir_prog_info_out *); + +/* Deserialize from data and save into a nv50_ir_prog_info_out structure + * using a pointer. Size is a total size of the serialized data. + * Offset points to where info_out in data is located. */ +extern bool MUST_CHECK +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 2118c3153f7..e651d7fdcb0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1209,8 +1209,8 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i) } } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1227,7 +1227,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) code[1] |= 1 << 13; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gk110_selpFlip); } } @@ -2042,8 +2042,8 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2078,10 +2078,10 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply); } else { code[0] |= 0xff << 23; - addInterp(i->ipa, 0xff, interpApply); + addInterp(i->ipa, 0xff, gk110_interpApply); } srcId(i->src(0).getIndirect(0), 10); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index dd8e1ab86c4..7d14466b9f8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -949,8 +949,8 @@ CodeEmitterGM107::emitI2I() emitGPR (0x00, insn->def(0)); } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -986,7 +986,7 @@ CodeEmitterGM107::emitSEL() emitGPR (0x00, insn->def(0)); if (insn->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, gm107_selpFlip); } } @@ -2556,8 +2556,8 @@ CodeEmitterGM107::emitAL2P() emitGPR (0x00, insn->def(0)); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -2617,12 +2617,12 @@ CodeEmitterGM107::emitIPA() emitGPR(0x14, insn->src(1)); if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(2)); - addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); + addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply); } else { if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) emitGPR(0x27, insn->src(1)); emitGPR(0x14); - addInterp(insn->ipa, 0xff, interpApply); + addInterp(insn->ipa, 0xff, gm107_interpApply); } if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp index ef33743e610..8b0489ca2fa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp @@ -350,8 +350,8 @@ CodeEmitterGV100::emitS2R() emitGPR (16, insn->def(0)); } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gv100_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -367,7 +367,7 @@ CodeEmitterGV100::emitSEL() emitNOT (90, insn->src(2)); emitPRED (87, insn->src(2)); if (insn->subOp == 1) - addInterp(0, 0, selpFlip); + addInterp(0, 0, gv100_selpFlip); } void @@ -910,8 +910,8 @@ CodeEmitterGV100::emitATOMS() emitGPR (16, insn->def(0)); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +gv100_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int loc = entry->loc; @@ -971,10 +971,10 @@ CodeEmitterGV100::emitIPA() if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) { emitGPR (32); - addInterp(insn->ipa, 0xff, interpApply); + addInterp(insn->ipa, 0xff, gv100_interpApply); } else { emitGPR (32, insn->src(1)); - addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); + addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gv100_interpApply); } assert(!insn->src(0).isIndirect(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 03fada1415e..60e800ad502 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -881,8 +881,8 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i) emitFlagsRd(i); } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +nv50_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int encSize = entry->reg; @@ -934,7 +934,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i) emitFlagsRd(i); } - addInterp(i->ipa, i->encSize, interpApply); + addInterp(i->ipa, i->encSize, nv50_interpApply); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index b6e35dd0ee4..0a82c6de20d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1255,8 +1255,8 @@ CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) code[0] |= 1 << 5; } -static void -selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +nvc0_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int loc = entry->loc; if (data.force_persample_interp) @@ -1273,7 +1273,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i) code[1] |= 1 << 20; if (i->subOp == 1) { - addInterp(0, 0, selpFlip); + addInterp(0, 0, nvc0_selpFlip); } } @@ -1726,8 +1726,8 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i) } } -static void -interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) +void +nvc0_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) { int ipa = entry->ipa; int reg = entry->reg; @@ -1762,10 +1762,10 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i) if (i->op == OP_PINTERP) { srcId(i->src(1), 26); - addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + addInterp(i->ipa, SDATA(i->src(1)).id, nvc0_interpApply); } else { code[0] |= 0x3f << 26; - addInterp(i->ipa, 0x3f, interpApply); + addInterp(i->ipa, 0x3f, nvc0_interpApply); } srcId(i->src(0).getIndirect(0), 20); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp new file mode 100644 index 00000000000..41f151ac283 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_serialize.cpp @@ -0,0 +1,230 @@ +#include "util/blob.h" +#include "codegen/nv50_ir_driver.h" +#include "codegen/nv50_ir.h" +#include "codegen/nv50_ir_target.h" +#include "nv50_ir_driver.h" +#include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir_serialize.h" + +enum FixupApplyFunc { + APPLY_NV50, + APPLY_NVC0, + APPLY_GK110, + APPLY_GM107, + APPLY_GV100, + FLIP_NVC0, + FLIP_GK110, + FLIP_GM107, + FLIP_GV100, +}; + +extern bool +nv50_ir_prog_info_out_serialize(struct blob *blob, + struct nv50_ir_prog_info_out *info_out) +{ + blob_write_uint16(blob, info_out->target); + blob_write_uint8(blob, info_out->type); + blob_write_uint8(blob, info_out->numPatchConstants); + + blob_write_uint16(blob, info_out->bin.maxGPR); + blob_write_uint32(blob, info_out->bin.tlsSpace); + blob_write_uint32(blob, info_out->bin.smemSize); + blob_write_uint32(blob, info_out->bin.codeSize); + blob_write_bytes(blob, info_out->bin.code, info_out->bin.codeSize); + blob_write_uint32(blob, info_out->bin.instructions); + + if (!info_out->bin.relocData) { + blob_write_uint32(blob, 0); // reloc count 0 + } else { + nv50_ir::RelocInfo *reloc = (nv50_ir::RelocInfo *)info_out->bin.relocData; + blob_write_uint32(blob, reloc->count); + blob_write_uint32(blob, reloc->codePos); + blob_write_uint32(blob, reloc->libPos); + blob_write_uint32(blob, reloc->dataPos); + blob_write_bytes(blob, reloc->entry, sizeof(*reloc->entry) * reloc->count); + } + + if (!info_out->bin.fixupData) { + blob_write_uint32(blob, 0); // fixup count 0 + } else { + nv50_ir::FixupInfo *fixup = (nv50_ir::FixupInfo *)info_out->bin.fixupData; + blob_write_uint32(blob, fixup->count); + + /* Going through each entry */ + for (uint32_t i = 0; i < fixup->count; i++) { + blob_write_uint32(blob, fixup->entry[i].val); + assert(fixup->entry[i].apply); + /* Compare function pointers, for when at serializing + * to know which function to apply */ + if (fixup->entry[i].apply == nv50_ir::nv50_interpApply) + blob_write_uint8(blob, APPLY_NV50); + else if (fixup->entry[i].apply == nv50_ir::nvc0_interpApply) + blob_write_uint8(blob, APPLY_NVC0); + else if (fixup->entry[i].apply == nv50_ir::gk110_interpApply) + blob_write_uint8(blob, APPLY_GK110); + else if (fixup->entry[i].apply == nv50_ir::gm107_interpApply) + blob_write_uint8(blob, APPLY_GM107); + else if (fixup->entry[i].apply == nv50_ir::gv100_interpApply) + blob_write_uint8(blob, APPLY_GV100); + else if (fixup->entry[i].apply == nv50_ir::nvc0_selpFlip) + blob_write_uint8(blob, FLIP_NVC0); + else if (fixup->entry[i].apply == nv50_ir::gk110_selpFlip) + blob_write_uint8(blob, FLIP_GK110); + else if (fixup->entry[i].apply == nv50_ir::gm107_selpFlip) + blob_write_uint8(blob, FLIP_GM107); + else if (fixup->entry[i].apply == nv50_ir::gv100_selpFlip) + blob_write_uint8(blob, FLIP_GV100); + else { + ERROR("unhandled fixup apply function pointer\n"); + assert(false); + return false; + } + } + } + + blob_write_uint8(blob, info_out->numInputs); + blob_write_uint8(blob, info_out->numOutputs); + blob_write_uint8(blob, info_out->numSysVals); + blob_write_bytes(blob, info_out->sv, info_out->numSysVals * sizeof(info_out->sv[0])); + blob_write_bytes(blob, info_out->in, info_out->numInputs * sizeof(info_out->in[0])); + blob_write_bytes(blob, info_out->out, info_out->numOutputs * sizeof(info_out->out[0])); + + switch(info_out->type) { + case PIPE_SHADER_VERTEX: + blob_write_bytes(blob, &info_out->prop.vp, sizeof(info_out->prop.vp)); + break; + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + blob_write_bytes(blob, &info_out->prop.tp, sizeof(info_out->prop.tp)); + break; + case PIPE_SHADER_GEOMETRY: + blob_write_bytes(blob, &info_out->prop.gp, sizeof(info_out->prop.gp)); + break; + case PIPE_SHADER_FRAGMENT: + blob_write_bytes(blob, &info_out->prop.fp, sizeof(info_out->prop.fp)); + break; + default: + break; + } + blob_write_bytes(blob, &info_out->io, sizeof(info_out->io)); + blob_write_uint8(blob, info_out->numBarriers); + + return true; +} + +extern bool +nv50_ir_prog_info_out_deserialize(void *data, size_t size, size_t offset, + struct nv50_ir_prog_info_out *info_out) +{ + struct blob_reader reader; + blob_reader_init(&reader, data, size); + blob_skip_bytes(&reader, offset); + + info_out->target = blob_read_uint16(&reader); + info_out->type = blob_read_uint8(&reader); + info_out->numPatchConstants = blob_read_uint8(&reader); + + info_out->bin.maxGPR = blob_read_uint16(&reader); + info_out->bin.tlsSpace = blob_read_uint32(&reader); + info_out->bin.smemSize = blob_read_uint32(&reader); + info_out->bin.codeSize = blob_read_uint32(&reader); + info_out->bin.code = (uint32_t *)MALLOC(info_out->bin.codeSize); + blob_copy_bytes(&reader, info_out->bin.code, info_out->bin.codeSize); + info_out->bin.instructions = blob_read_uint32(&reader); + + info_out->bin.relocData = NULL; + /* Check if data contains RelocInfo */ + uint32_t count = blob_read_uint32(&reader); + if (count) { + nv50_ir::RelocInfo *reloc = + CALLOC_VARIANT_LENGTH_STRUCT(nv50_ir::RelocInfo, + count * sizeof(*reloc->entry)); + reloc->codePos = blob_read_uint32(&reader); + reloc->libPos = blob_read_uint32(&reader); + reloc->dataPos = blob_read_uint32(&reader); + reloc->count = count; + + blob_copy_bytes(&reader, reloc->entry, sizeof(*reloc->entry) * reloc->count); + info_out->bin.relocData = reloc; + } + + info_out->bin.fixupData = NULL; + /* Check if data contains FixupInfo */ + count = blob_read_uint32(&reader); + if (count) { + nv50_ir::FixupInfo *fixup = + CALLOC_VARIANT_LENGTH_STRUCT(nv50_ir::FixupInfo, + count * sizeof(*fixup->entry)); + fixup->count = count; + + for (uint32_t i = 0; i < count; i++) { + fixup->entry[i].val = blob_read_uint32(&reader); + + /* Assign back function pointer depending on stored enum */ + enum FixupApplyFunc apply = (enum FixupApplyFunc)blob_read_uint8(&reader); + switch(apply) { + case APPLY_NV50: + fixup->entry[i].apply = nv50_ir::nv50_interpApply; + break; + case APPLY_NVC0: + fixup->entry[i].apply = nv50_ir::nvc0_interpApply; + break; + case APPLY_GK110: + fixup->entry[i].apply = nv50_ir::gk110_interpApply; + break; + case APPLY_GM107: + fixup->entry[i].apply = nv50_ir::gm107_interpApply; + break; + case APPLY_GV100: + fixup->entry[i].apply = nv50_ir::gv100_interpApply; + break; + case FLIP_NVC0: + fixup->entry[i].apply = nv50_ir::nvc0_selpFlip; + break; + case FLIP_GK110: + fixup->entry[i].apply = nv50_ir::gk110_selpFlip; + break; + case FLIP_GM107: + fixup->entry[i].apply = nv50_ir::gm107_selpFlip; + break; + case FLIP_GV100: + fixup->entry[i].apply = nv50_ir::gv100_selpFlip; + break; + default: + ERROR("unhandled fixup apply function switch case"); + assert(false); + return false; + } + } + info_out->bin.fixupData = fixup; + } + + info_out->numInputs = blob_read_uint8(&reader); + info_out->numOutputs = blob_read_uint8(&reader); + info_out->numSysVals = blob_read_uint8(&reader); + blob_copy_bytes(&reader, info_out->sv, info_out->numSysVals * sizeof(info_out->sv[0])); + blob_copy_bytes(&reader, info_out->in, info_out->numInputs * sizeof(info_out->in[0])); + blob_copy_bytes(&reader, info_out->out, info_out->numOutputs * sizeof(info_out->out[0])); + + switch(info_out->type) { + case PIPE_SHADER_VERTEX: + blob_copy_bytes(&reader, &info_out->prop.vp, sizeof(info_out->prop.vp)); + break; + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + blob_copy_bytes(&reader, &info_out->prop.tp, sizeof(info_out->prop.tp)); + break; + case PIPE_SHADER_GEOMETRY: + blob_copy_bytes(&reader, &info_out->prop.gp, sizeof(info_out->prop.gp)); + break; + case PIPE_SHADER_FRAGMENT: + blob_copy_bytes(&reader, &info_out->prop.fp, sizeof(info_out->prop.fp)); + break; + default: + break; + } + blob_copy_bytes(&reader, &(info_out->io), sizeof(info_out->io)); + info_out->numBarriers = blob_read_uint8(&reader); + + return true; +} diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build index 5a2d28a8394..a7197ddff79 100644 --- a/src/gallium/drivers/nouveau/meson.build +++ b/src/gallium/drivers/nouveau/meson.build @@ -142,6 +142,7 @@ files_libnouveau = files( 'codegen/nv50_ir_peephole.cpp', 'codegen/nv50_ir_print.cpp', 'codegen/nv50_ir_ra.cpp', + 'codegen/nv50_ir_serialize.cpp', 'codegen/nv50_ir_ssa.cpp', 'codegen/nv50_ir_target.cpp', 'codegen/nv50_ir_target.h', -- 2.30.2