nvc0: fix submission of VertexID and EdgeFlag in push mode
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[OP_LAST + 1] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
48 1, 2, // SULD, SUST
49 1, 1, // DFDX, DFDY
50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
51 2, 3, 2, // POPCNT, INSBF, EXTBF
52 0
53 };
54
55
56 extern Target *getTargetNVC0(unsigned int chipset);
57
58 Target *Target::create(unsigned int chipset)
59 {
60 switch (chipset & 0xf0) {
61 case 0xc0:
62 case 0xd0:
63 return getTargetNVC0(chipset);
64 case 0x50:
65 case 0x80:
66 case 0x90:
67 case 0xa0:
68 default:
69 ERROR("unsupported target: NV%x\n", chipset);
70 return 0;
71 }
72 }
73
74 void Target::destroy(Target *targ)
75 {
76 delete targ;
77 }
78
79 void
80 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
81 {
82 code = reinterpret_cast<uint32_t *>(ptr);
83 codeSize = 0;
84 codeSizeLimit = size;
85 }
86
87 void
88 CodeEmitter::printBinary() const
89 {
90 uint32_t *bin = code - codeSize / 4;
91 INFO("program binary (%u bytes)", codeSize);
92 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
93 if ((pos % 8) == 0)
94 INFO("\n");
95 INFO("%08x ", bin[pos]);
96 }
97 INFO("\n");
98 }
99
100 void
101 CodeEmitter::prepareEmission(Program *prog)
102 {
103 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
104 !fi.end(); fi.next()) {
105 Function *func = reinterpret_cast<Function *>(fi.get());
106 func->binPos = prog->binSize;
107 prepareEmission(func);
108 prog->binSize += func->binSize;
109 }
110 }
111
112 void
113 CodeEmitter::prepareEmission(Function *func)
114 {
115 func->bbCount = 0;
116 func->bbArray = new BasicBlock * [func->cfg.getSize()];
117
118 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
119
120 Graph::GraphIterator *iter;
121 for (iter = func->cfg.iteratorCFG(); !iter->end(); iter->next())
122 prepareEmission(BasicBlock::get(*iter));
123 func->cfg.putIterator(iter);
124 }
125
126 void
127 CodeEmitter::prepareEmission(BasicBlock *bb)
128 {
129 Instruction *i, *next;
130 Function *func = bb->getFunction();
131 int j;
132 unsigned int nShort;
133
134 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
135
136 for (; j >= 0; --j) {
137 BasicBlock *in = func->bbArray[j];
138 Instruction *exit = in->getExit();
139
140 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
141 in->binSize -= 8;
142 func->binSize -= 8;
143
144 for (++j; j < func->bbCount; ++j)
145 func->bbArray[j]->binPos -= 8;
146
147 in->remove(exit);
148 }
149 bb->binPos = in->binPos + in->binSize;
150 if (in->binSize) // no more no-op branches to bb
151 break;
152 }
153 func->bbArray[func->bbCount++] = bb;
154
155 if (!bb->getExit())
156 return;
157
158 // determine encoding size, try to group short instructions
159 nShort = 0;
160 for (i = bb->getEntry(); i; i = next) {
161 next = i->next;
162
163 i->encSize = getMinEncodingSize(i);
164 if (next && i->encSize < 8)
165 ++nShort;
166 else
167 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
168 if (i->isCommutationLegal(i->next)) {
169 bb->permuteAdjacent(i, next);
170 next->encSize = 4;
171 next = i;
172 i = i->prev;
173 ++nShort;
174 } else
175 if (i->isCommutationLegal(i->prev) && next->next) {
176 bb->permuteAdjacent(i->prev, i);
177 next->encSize = 4;
178 next = next->next;
179 bb->binSize += 4;
180 ++nShort;
181 } else {
182 i->encSize = 8;
183 i->prev->encSize = 8;
184 bb->binSize += 4;
185 nShort = 0;
186 }
187 } else {
188 i->encSize = 8;
189 if (nShort & 1) {
190 i->prev->encSize = 8;
191 bb->binSize += 4;
192 }
193 nShort = 0;
194 }
195 bb->binSize += i->encSize;
196 }
197
198 if (bb->getExit()->encSize == 4) {
199 assert(nShort);
200 bb->getExit()->encSize = 8;
201 bb->binSize += 4;
202
203 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
204 bb->binSize += 8;
205 bb->getExit()->prev->encSize = 8;
206 }
207 }
208 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
209
210 func->binSize += bb->binSize;
211 }
212
213 bool
214 Program::emitBinary(struct nv50_ir_prog_info *info)
215 {
216 CodeEmitter *emit = target->getCodeEmitter(progType);
217
218 emit->prepareEmission(this);
219
220 if (dbgFlags & NV50_IR_DEBUG_BASIC)
221 this->print();
222
223 if (!binSize) {
224 code = NULL;
225 return false;
226 }
227 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
228 if (!code)
229 return false;
230 emit->setCodeLocation(code, binSize);
231
232 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
233 Function *fn = reinterpret_cast<Function *>(fi.get());
234
235 assert(emit->getCodeSize() == fn->binPos);
236
237 for (int b = 0; b < fn->bbCount; ++b)
238 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
239 emit->emitInstruction(i);
240 }
241 info->bin.relocData = emit->getRelocInfo();
242
243 delete emit;
244 return true;
245 }
246
247 #define RELOC_ALLOC_INCREMENT 8
248
249 bool
250 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
251 int s)
252 {
253 unsigned int n = relocInfo ? relocInfo->count : 0;
254
255 if (!(n % RELOC_ALLOC_INCREMENT)) {
256 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
257 relocInfo = reinterpret_cast<RelocInfo *>(
258 REALLOC(relocInfo, n ? size : 0,
259 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
260 if (!relocInfo)
261 return false;
262 if (n == 0)
263 memset(relocInfo, 0, sizeof(RelocInfo));
264 }
265 ++relocInfo->count;
266
267 relocInfo->entry[n].data = data;
268 relocInfo->entry[n].mask = m;
269 relocInfo->entry[n].offset = codeSize + w * 4;
270 relocInfo->entry[n].bitPos = s;
271 relocInfo->entry[n].type = ty;
272
273 return true;
274 }
275
276 void
277 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
278 {
279 uint32_t value = 0;
280
281 switch (type) {
282 case TYPE_CODE: value = info->codePos; break;
283 case TYPE_BUILTIN: value = info->libPos; break;
284 case TYPE_DATA: value = info->dataPos; break;
285 default:
286 assert(0);
287 break;
288 }
289 value += data;
290 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
291
292 binary[offset / 4] &= ~mask;
293 binary[offset / 4] |= value & mask;
294 }
295
296 } // namespace nv50_ir
297
298
299 #include "nv50/codegen/nv50_ir_driver.h"
300
301 extern "C" {
302
303 void
304 nv50_ir_relocate_code(void *relocData, uint32_t *code,
305 uint32_t codePos,
306 uint32_t libPos,
307 uint32_t dataPos)
308 {
309 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
310
311 info->codePos = codePos;
312 info->libPos = libPos;
313 info->dataPos = dataPos;
314
315 for (unsigned int i = 0; i < info->count; ++i)
316 info->entry[i].apply(code, info);
317 }
318
319 void
320 nv50_ir_get_target_library(uint32_t chipset,
321 const uint32_t **code, uint32_t *size)
322 {
323 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
324 targ->getBuiltinCode(code, size);
325 nv50_ir::Target::destroy(targ);
326 }
327
328 }