From 56d40aa51b34b77791cc3a49d7e86473a7459b72 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 27 Mar 2012 17:30:31 +0200 Subject: [PATCH] nv50/ir: Decouple DataArray from the dictionary that maps locations to values. The point is to keep an independent dictionary for each function. The array that was being used as dictionary has been converted into a "bimap" for two different reasons: first, because having an almost empty instance of an array with as many entries as registers there are in the program, once for every function, would be wasteful, and second, because we want to be able to map Value pointers back to locations at some point. --- .../nv50/codegen/nv50_ir_build_util.cpp | 142 +++++------- .../drivers/nv50/codegen/nv50_ir_build_util.h | 79 +++++-- .../nv50/codegen/nv50_ir_from_tgsi.cpp | 213 ++++++++---------- .../drivers/nv50/codegen/nv50_ir_util.h | 25 +- 4 files changed, 236 insertions(+), 223 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index bdde9eaa904..cb63854b043 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -27,7 +27,19 @@ namespace nv50_ir { BuildUtil::BuildUtil() { - prog = NULL; + init(NULL); +} + +BuildUtil::BuildUtil(Program *prog) +{ + init(prog); +} + +void +BuildUtil::init(Program *prog) +{ + this->prog = prog; + func = NULL; bb = NULL; pos = NULL; @@ -417,127 +429,91 @@ BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex) } void -BuildUtil::DataArray::init() -{ - values = NULL; - baseAddr = 0; - arrayLen = 0; - - vecDim = 4; - eltSize = 2; - - file = FILE_GPR; - regOnly = true; -} - -BuildUtil::DataArray::DataArray() -{ - init(); -} - -BuildUtil::DataArray::DataArray(BuildUtil *bld) : up(bld) -{ - init(); -} - -BuildUtil::DataArray::~DataArray() -{ - if (values) - delete[] values; -} - -void -BuildUtil::DataArray::setup(uint32_t base, int len, int v, int size, - DataFile f, int8_t fileIndex) -{ - baseAddr = base; - arrayLen = len; - - vecDim = v; - eltSize = size; - - file = f; - regOnly = !isMemoryFile(f); - - values = new Value * [arrayLen * vecDim]; - if (values) - memset(values, 0, arrayLen * vecDim * sizeof(Value *)); +BuildUtil::DataArray::setup(unsigned array, unsigned arrayIdx, + uint32_t base, int len, int vecDim, int eltSize, + DataFile file, int8_t fileIdx) +{ + this->array = array; + this->arrayIdx = arrayIdx; + this->baseAddr = base; + this->arrayLen = len; + this->vecDim = vecDim; + this->eltSize = eltSize; + this->file = file; + this->regOnly = !isMemoryFile(file); if (!regOnly) { - baseSym = new_Symbol(up->getProgram(), file, fileIndex); + baseSym = new_Symbol(up->getProgram(), file, fileIdx); baseSym->setOffset(baseAddr); - baseSym->reg.size = size; + baseSym->reg.size = eltSize; + } else { + baseSym = NULL; } } Value * -BuildUtil::DataArray::acquire(int i, int c) +BuildUtil::DataArray::acquire(ValueMap &m, int i, int c) { - const unsigned int idx = i * vecDim + c; - - assert(idx < arrayLen * vecDim); - if (regOnly) { - const unsigned int idx = i * 4 + c; // vecDim always 4 if regOnly - if (!values[idx]) - values[idx] = new_LValue(up->getFunction(), file); - return values[idx]; + Value *v = lookup(m, i, c); + if (!v) + v = insert(m, i, c, new_LValue(up->getFunction(), file)); + + return v; } else { return up->getScratch(); } } Value * -BuildUtil::DataArray::load(int i, int c, Value *ptr) +BuildUtil::DataArray::load(ValueMap &m, int i, int c, Value *ptr) { - const unsigned int idx = i * vecDim + c; - - assert(idx < arrayLen * vecDim); - if (regOnly) { - if (!values[idx]) - values[idx] = new_LValue(up->getFunction(), file); - return values[idx]; + Value *v = lookup(m, i, c); + if (!v) + v = insert(m, i, c, new_LValue(up->getFunction(), file)); + + return v; } else { - Symbol *sym = reinterpret_cast(values[idx]); + Value *sym = lookup(m, i, c); if (!sym) - values[idx] = sym = this->mkSymbol(i, c, baseSym); - return up->mkLoad(typeOfSize(eltSize), sym, ptr); + sym = insert(m, i, c, mkSymbol(i, c)); + + return up->mkLoad(typeOfSize(eltSize), static_cast(sym), ptr); } } void -BuildUtil::DataArray::store(int i, int c, Value *ptr, Value *value) +BuildUtil::DataArray::store(ValueMap &m, int i, int c, Value *ptr, Value *value) { - const unsigned int idx = i * vecDim + c; - - assert(idx < arrayLen * vecDim); - if (regOnly) { assert(!ptr); - assert(!values[idx] || values[idx] == value); - values[idx] = value; + if (!lookup(m, i, c)) + insert(m, i, c, value); + + assert(lookup(m, i, c) == value); } else { - Symbol *sym = reinterpret_cast(values[idx]); + Value *sym = lookup(m, i, c); if (!sym) - values[idx] = sym = this->mkSymbol(i, c, baseSym); - up->mkStore(OP_STORE, typeOfSize(value->reg.size), sym, ptr, value); + sym = insert(m, i, c, mkSymbol(i, c)); + + const DataType stTy = typeOfSize(value->reg.size); + + up->mkStore(OP_STORE, stTy, static_cast(sym), ptr, value); } } Symbol * -BuildUtil::DataArray::mkSymbol(int i, int c, Symbol *base) +BuildUtil::DataArray::mkSymbol(int i, int c) { const unsigned int idx = i * vecDim + c; - Symbol *sym = new_Symbol(up->getProgram(), file, 0); - assert(base || (idx < arrayLen && c < vecDim)); + assert(baseSym || (idx < arrayLen && c < vecDim)); sym->reg.size = eltSize; sym->reg.type = typeOfSize(eltSize); - - sym->setAddress(base, baseAddr + idx * eltSize); + sym->setAddress(baseSym, baseAddr + idx * eltSize); return sym; } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index f815cf06759..69158861533 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -29,6 +29,7 @@ class BuildUtil { public: BuildUtil(); + BuildUtil(Program *); inline void setProgram(Program *); inline Program *getProgram() const { return prog; } @@ -94,29 +95,58 @@ public: Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); } + struct Location + { + Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c) + : array(array), arrayIdx(arrayIdx), i(i), c(c) { } + Location(const Location &l) + : array(l.array), arrayIdx(l.arrayIdx), i(l.i), c(l.c) { } + + bool operator==(const Location &l) const + { + return + array == l.array && arrayIdx == l.arrayIdx && i == l.i && c == l.c; + } + + bool operator<(const Location &l) const + { + return array != l.array ? array < l.array : + arrayIdx != l.arrayIdx ? arrayIdx < l.arrayIdx : + i != l.i ? i < l.i : + c != l.c ? c < l.c : + false; + } + + unsigned array, arrayIdx, i, c; + }; + + typedef bimap ValueMap; + class DataArray { public: - DataArray(); - DataArray(BuildUtil *); - ~DataArray(); - - inline void setParent(BuildUtil *bld) { assert(!up); up = bld; } + DataArray(BuildUtil *bld) : up(bld) { } - void setup(uint32_t base, int len, int vecDim, int size, - DataFile, int8_t fileIndex = 0); + void setup(unsigned array, unsigned arrayIdx, + uint32_t base, int len, int vecDim, int eltSize, + DataFile file, int8_t fileIdx); - inline bool exists(unsigned int i, unsigned int c); + inline bool exists(ValueMap&, unsigned int i, unsigned int c); - Value *load(int i, int c, Value *ptr); - void store(int i, int c, Value *ptr, Value *value); - Value *acquire(int i, int c); + Value *load(ValueMap&, int i, int c, Value *ptr); + void store(ValueMap&, int i, int c, Value *ptr, Value *value); + Value *acquire(ValueMap&, int i, int c); private: - Symbol *mkSymbol(int i, int c, Symbol *base); + inline Value *lookup(ValueMap&, unsigned i, unsigned c); + inline Value *insert(ValueMap&, unsigned i, unsigned c, Value *v); + + Symbol *mkSymbol(int i, int c); private: - Value **values; + BuildUtil *up; + unsigned array, arrayIdx; + uint32_t baseAddr; uint32_t arrayLen; Symbol *baseSym; @@ -126,10 +156,6 @@ public: DataFile file; bool regOnly; - - BuildUtil *up; - - void init(); }; Symbol *mkSymbol(DataFile file, int8_t fileIndex, @@ -138,6 +164,7 @@ public: Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex); private: + void init(Program *); void addImmediate(ImmediateValue *); inline unsigned int u32Hash(uint32_t); @@ -256,10 +283,24 @@ BuildUtil::mkOp3v(operation op, DataType ty, Value *dst, } bool -BuildUtil::DataArray::exists(unsigned int i, unsigned int c) +BuildUtil::DataArray::exists(ValueMap &m, unsigned int i, unsigned int c) { assert(i < arrayLen && c < vecDim); - return !regOnly || values[i * vecDim + c]; + return !regOnly || m.r.count(Location(array, arrayIdx, i, c)); +} + +Value * +BuildUtil::DataArray::lookup(ValueMap &m, unsigned i, unsigned c) +{ + ValueMap::r_iterator it = m.r.find(Location(array, arrayIdx, i, c)); + return it != m.r.end() ? it->second : NULL; +} + +Value * +BuildUtil::DataArray::insert(ValueMap &m, unsigned i, unsigned c, Value *v) +{ + m.insert(Location(array, arrayIdx, i, c), v); + return v; } } // namespace nv50_ir diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index bc602f4ee9f..9f735bfb2e2 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -1029,6 +1029,7 @@ public: private: Value *getVertexBase(int s); + DataArray *getArrayForFile(unsigned file, int idx); Value *fetchSrc(int s, int c); Value *acquireDst(int d, int c); void storeDst(int d, int c, Value *); @@ -1078,8 +1079,10 @@ private: DataArray aData; // TGSI_FILE_ADDRESS DataArray pData; // TGSI_FILE_PREDICATE DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) - DataArray *lData; // TGSI_FILE_TEMPORARY_ARRAY - DataArray *iData; // TGSI_FILE_IMMEDIATE_ARRAY + std::vector lData; // TGSI_FILE_TEMPORARY_ARRAY + std::vector iData; // TGSI_FILE_IMMEDIATE_ARRAY + + ValueMap values; Value *zero; Value *fragCoord[4]; @@ -1252,34 +1255,44 @@ Converter::fetchSrc(int s, int c) return applySrcMod(res, s, c); } -Value * -Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) +Converter::DataArray * +Converter::getArrayForFile(unsigned file, int idx) { - const int idx = src.getIndex(0); - const int swz = src.getSwizzle(c); - - switch (src.getFile()) { + switch (file) { case TGSI_FILE_TEMPORARY: - return tData.load(idx, swz, ptr); + return &tData; case TGSI_FILE_PREDICATE: - return pData.load(idx, swz, ptr); + return &pData; case TGSI_FILE_ADDRESS: - return aData.load(idx, swz, ptr); - + return &aData; case TGSI_FILE_TEMPORARY_ARRAY: - assert(src.is2D() && src.getIndex(1) < code->tempArrayCount); - return lData[src.getIndex(1)].load(idx, swz, ptr); + assert(idx < code->tempArrayCount); + return &lData[idx]; case TGSI_FILE_IMMEDIATE_ARRAY: - assert(src.is2D() && src.getIndex(1) < code->immdArrayCount); - return iData[src.getIndex(1)].load(idx, swz, ptr); + assert(idx < code->immdArrayCount); + return &iData[idx]; + case TGSI_FILE_OUTPUT: + assert(prog->getType() == Program::TYPE_FRAGMENT); + return &oData; + default: + assert(!"invalid/unhandled TGSI source file"); + return NULL; + } +} +Value * +Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) +{ + const int idx2d = src.is2D() ? src.getIndex(1) : 0; + const int idx = src.getIndex(0); + const int swz = src.getSwizzle(c); + + switch (src.getFile()) { case TGSI_FILE_IMMEDIATE: assert(!ptr); return loadImm(NULL, info->immd.data[idx * 4 + swz]); - case TGSI_FILE_CONSTANT: return mkLoad(TYPE_U32, srcToSym(src, c), ptr); - case TGSI_FILE_INPUT: if (prog->getType() == Program::TYPE_FRAGMENT) { // don't load masked inputs, won't be assigned a slot @@ -1290,18 +1303,14 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) return interpolate(src, c, ptr); } return mkLoad(TYPE_U32, srcToSym(src, c), ptr); - + case TGSI_FILE_OUTPUT: + assert(!"load from output file"); + return NULL; case TGSI_FILE_SYSTEM_VALUE: assert(!ptr); return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); - - case TGSI_FILE_OUTPUT: - case TGSI_FILE_RESOURCE: - case TGSI_FILE_SAMPLER: - case TGSI_FILE_NULL: default: - assert(!"invalid/unhandled TGSI source file"); - return NULL; + return getArrayForFile(src.getFile(), idx2d)->load(values, idx, swz, ptr); } } @@ -1309,35 +1318,20 @@ Value * Converter::acquireDst(int d, int c) { const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); - - if (dst.isMasked(c)) - return NULL; - if (dst.isIndirect(0)) - return getScratch(); - + const unsigned f = dst.getFile(); const int idx = dst.getIndex(0); + const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - switch (dst.getFile()) { - case TGSI_FILE_TEMPORARY: - return tData.acquire(idx, c); - case TGSI_FILE_TEMPORARY_ARRAY: - return getScratch(); - case TGSI_FILE_PREDICATE: - return pData.acquire(idx, c); - case TGSI_FILE_ADDRESS: - return aData.acquire(idx, c); + if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) + return NULL; - case TGSI_FILE_OUTPUT: - if (prog->getType() == Program::TYPE_FRAGMENT) - return oData.acquire(idx, c); - // fall through - case TGSI_FILE_SYSTEM_VALUE: + if (dst.isIndirect(0) || + f == TGSI_FILE_TEMPORARY_ARRAY || + f == TGSI_FILE_SYSTEM_VALUE || + (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) return getScratch(); - default: - assert(!"invalid dst file"); - return NULL; - } + return getArrayForFile(f, idx2d)-> acquire(values, idx, c); } void @@ -1377,38 +1371,25 @@ void Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, Value *val, Value *ptr) { + const unsigned f = dst.getFile(); const int idx = dst.getIndex(0); + const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - switch (dst.getFile()) { - case TGSI_FILE_TEMPORARY: - tData.store(idx, c, ptr, val); - break; - case TGSI_FILE_TEMPORARY_ARRAY: - assert(dst.is2D() && dst.getIndex(1) < code->tempArrayCount); - lData[dst.getIndex(1)].store(idx, c, ptr, val); - break; - case TGSI_FILE_PREDICATE: - pData.store(idx, c, ptr, val); - break; - case TGSI_FILE_ADDRESS: - aData.store(idx, c, ptr, val); - break; - - case TGSI_FILE_OUTPUT: - if (prog->getType() == Program::TYPE_FRAGMENT) - oData.store(idx, c, ptr, val); - else - mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); - break; - - case TGSI_FILE_SYSTEM_VALUE: + if (f == TGSI_FILE_SYSTEM_VALUE) { assert(!ptr); mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); - break; - - default: + } else + if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + } else + if (f == TGSI_FILE_TEMPORARY || + f == TGSI_FILE_TEMPORARY_ARRAY || + f == TGSI_FILE_PREDICATE || + f == TGSI_FILE_ADDRESS || + f == TGSI_FILE_OUTPUT) { + getArrayForFile(f, idx2d)->store(values, idx, c, ptr, val); + } else { assert(!"invalid dst file"); - break; } } @@ -2237,34 +2218,57 @@ Converter::exportOutputs() { for (unsigned int i = 0; i < info->numOutputs; ++i) { for (unsigned int c = 0; c < 4; ++c) { - if (!oData.exists(i, c)) + if (!oData.exists(values, i, c)) continue; Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[i].slot[c] * 4); - Value *val = oData.load(i, c, NULL); + Value *val = oData.load(values, i, c, NULL); if (val) mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); } } } -Converter::Converter(Program *ir, const tgsi::Source *src) - : code(src), +Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), + code(code), tgsi(NULL), tData(this), aData(this), pData(this), oData(this) { - prog = ir; info = code->info; - DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; + const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; + + const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); + const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); + const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); + const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); + + tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); + pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); + aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); + oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); + + for (int vol = 0, i = 0; i < code->tempArrayCount; ++i) { + int len = code->tempArrays[i].u32 >> 2; + int dim = code->tempArrays[i].u32 & 3; + + lData.push_back(DataArray(this)); + lData.back().setup(TGSI_FILE_TEMPORARY_ARRAY, i, vol, len, dim, 4, + FILE_MEMORY_LOCAL, 0); - tData.setup(0, code->fileSize(TGSI_FILE_TEMPORARY), 4, 4, tFile); - pData.setup(0, code->fileSize(TGSI_FILE_PREDICATE), 4, 4, FILE_PREDICATE); - aData.setup(0, code->fileSize(TGSI_FILE_ADDRESS), 4, 4, FILE_ADDRESS); - oData.setup(0, code->fileSize(TGSI_FILE_OUTPUT), 4, 4, FILE_GPR); + vol += (len * dim * 4 + 0xf) & ~0xf; + } + + for (int vol = 0, i = 0; i < code->immdArrayCount; ++i) { + int len = code->immdArrays[i].u32 >> 2; + int dim = code->immdArrays[i].u32 & 3; + + lData.push_back(DataArray(this)); + lData.back().setup(TGSI_FILE_IMMEDIATE_ARRAY, i, vol, len, dim, 4, + FILE_MEMORY_CONST, 14); - lData = NULL; - iData = NULL; + vol += (len * dim * 4 + 0xf) & ~0xf; + } zero = mkImm((uint32_t)0); @@ -2273,10 +2277,6 @@ Converter::Converter(Program *ir, const tgsi::Source *src) Converter::~Converter() { - if (lData) - delete[] lData; - if (iData) - delete[] iData; } bool @@ -2285,33 +2285,6 @@ Converter::run() BasicBlock *entry = new BasicBlock(prog->main); BasicBlock *leave = new BasicBlock(prog->main); - if (code->tempArrayCount && !lData) { - uint32_t volume = 0; - lData = new DataArray[code->tempArrayCount]; - if (!lData) - return false; - for (int i = 0; i < code->tempArrayCount; ++i) { - int len = code->tempArrays[i].u32 >> 2; - int dim = code->tempArrays[i].u32 & 3; - lData[i].setParent(this); - lData[i].setup(volume, len, dim, 4, FILE_MEMORY_LOCAL); - volume += (len * dim * 4 + 0xf) & ~0xf; - } - } - if (code->immdArrayCount && !iData) { - uint32_t volume = 0; - iData = new DataArray[code->immdArrayCount]; - if (!iData) - return false; - for (int i = 0; i < code->immdArrayCount; ++i) { - int len = code->immdArrays[i].u32 >> 2; - int dim = code->immdArrays[i].u32 & 3; - iData[i].setParent(this); - iData[i].setup(volume, len, dim, 4, FILE_MEMORY_CONST, 14); - volume += (len * dim * 4 + 0xf) & ~0xf; - } - } - prog->main->setEntry(entry); prog->main->setExit(leave); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h index dc6be9f3af8..262cf9cd496 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_util.h @@ -26,8 +26,8 @@ #include #include #include -#include #include +#include #ifndef NDEBUG # include @@ -704,6 +704,29 @@ protected: } }; +template +struct bimap +{ + std::map forth; + std::map back; + +public: + bimap() : l(back), r(forth) { } + bimap(const bimap &m) + : forth(m.forth), back(m.back), l(back), r(forth) { } + + void insert(const S &s, const T &t) + { + forth.insert(std::make_pair(s, t)); + back.insert(std::make_pair(t, s)); + } + + typedef typename std::map::const_iterator l_iterator; + const std::map &l; + typedef typename std::map::const_iterator r_iterator; + const std::map &r; +}; + } // namespace nv50_ir #endif // __NV50_IR_UTIL_H__ -- 2.30.2