9193a01f189874a7fb384529a4f34fbc9148a452
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 3, 3, // SHLADD, XMAD
35 1, 1, 1, // ABS, NEG, NOT
36 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
37 2, 2, 1, // MAX, MIN, SAT
38 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
39 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
40 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
41 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
43 0, 0, 0, // PRERET,CONT,BREAK
44 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46 1, 1, // EMIT, RESTART
47 1, 1, 1, // TEX, TXB, TXL,
48 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51 0, // TEXBAR
52 1, 1, // DFDX, DFDY
53 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
55 2, 2, // ATOM, BAR
56 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
57 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
58 3, // SHFL
59 1, // VOTE
60 1, // BUFQ
61 0
62 };
63
64 const OpClass Target::operationClass[] =
65 {
66 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
67 OPCLASS_OTHER,
68 OPCLASS_PSEUDO,
69 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
70 // MOV; LOAD; STORE
71 OPCLASS_MOVE,
72 OPCLASS_LOAD,
73 OPCLASS_STORE,
74 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
75 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
76 OPCLASS_ARITH, OPCLASS_ARITH,
77 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
79 OPCLASS_CONVERT, OPCLASS_CONVERT,
80 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
81 OPCLASS_SHIFT, OPCLASS_SHIFT,
82 // MAX, MIN
83 OPCLASS_COMPARE, OPCLASS_COMPARE,
84 // SAT, CEIL, FLOOR, TRUNC; CVT
85 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
86 OPCLASS_CONVERT,
87 // SET(AND,OR,XOR); SELP, SLCT
88 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
89 OPCLASS_COMPARE, OPCLASS_COMPARE,
90 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
91 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
92 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
93 OPCLASS_SFU, OPCLASS_SFU,
94 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
95 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
96 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
97 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98 // DISCARD, EXIT
99 OPCLASS_FLOW, OPCLASS_FLOW,
100 // MEMBAR
101 OPCLASS_CONTROL,
102 // VFETCH, PFETCH, AFETCH, EXPORT
103 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
104 // LINTERP, PINTERP
105 OPCLASS_SFU, OPCLASS_SFU,
106 // EMIT, RESTART
107 OPCLASS_CONTROL, OPCLASS_CONTROL,
108 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
109 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
110 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
111 OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
113 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
114 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
115 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
116 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
117 // TEXBAR
118 OPCLASS_OTHER,
119 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
120 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
121 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
122 // POPCNT, INSBF, EXTBF, BFIND; PERMT
123 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
124 OPCLASS_BITFIELD,
125 // ATOM, BAR
126 OPCLASS_ATOMIC, OPCLASS_CONTROL,
127 // VADD, VAVG, VMIN, VMAX
128 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
129 // VSAD, VSET, VSHR, VSHL
130 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131 // VSEL, CCTL
132 OPCLASS_VECTOR, OPCLASS_CONTROL,
133 // SHFL
134 OPCLASS_OTHER,
135 // VOTE
136 OPCLASS_OTHER,
137 // BUFQ
138 OPCLASS_OTHER,
139 OPCLASS_PSEUDO // LAST
140 };
141
142
143 extern Target *getTargetGM107(unsigned int chipset);
144 extern Target *getTargetNVC0(unsigned int chipset);
145 extern Target *getTargetNV50(unsigned int chipset);
146
147 Target *Target::create(unsigned int chipset)
148 {
149 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
150 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
151 switch (chipset & ~0xf) {
152 case 0x110:
153 case 0x120:
154 case 0x130:
155 return getTargetGM107(chipset);
156 case 0xc0:
157 case 0xd0:
158 case 0xe0:
159 case 0xf0:
160 case 0x100:
161 return getTargetNVC0(chipset);
162 case 0x50:
163 case 0x80:
164 case 0x90:
165 case 0xa0:
166 return getTargetNV50(chipset);
167 default:
168 ERROR("unsupported target: NV%x\n", chipset);
169 return 0;
170 }
171 }
172
173 void Target::destroy(Target *targ)
174 {
175 delete targ;
176 }
177
178 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
179 {
180 }
181
182 void
183 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
184 {
185 code = reinterpret_cast<uint32_t *>(ptr);
186 codeSize = 0;
187 codeSizeLimit = size;
188 }
189
190 void
191 CodeEmitter::printBinary() const
192 {
193 uint32_t *bin = code - codeSize / 4;
194 INFO("program binary (%u bytes)", codeSize);
195 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
196 if ((pos % 8) == 0)
197 INFO("\n");
198 INFO("%08x ", bin[pos]);
199 }
200 INFO("\n");
201 }
202
203 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
204 {
205 return (size + 55) / 56;
206 }
207
208 void
209 CodeEmitter::prepareEmission(Program *prog)
210 {
211 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
212 !fi.end(); fi.next()) {
213 Function *func = reinterpret_cast<Function *>(fi.get());
214 func->binPos = prog->binSize;
215 prepareEmission(func);
216
217 // adjust sizes & positions for schedulding info:
218 if (prog->getTarget()->hasSWSched) {
219 uint32_t adjPos = func->binPos;
220 BasicBlock *bb = NULL;
221 for (int i = 0; i < func->bbCount; ++i) {
222 bb = func->bbArray[i];
223 int32_t adjSize = bb->binSize;
224 if (adjPos % 64) {
225 adjSize -= 64 - adjPos % 64;
226 if (adjSize < 0)
227 adjSize = 0;
228 }
229 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
230 bb->binPos = adjPos;
231 bb->binSize = adjSize;
232 adjPos += adjSize;
233 }
234 if (bb)
235 func->binSize = adjPos - func->binPos;
236 }
237
238 prog->binSize += func->binSize;
239 }
240 }
241
242 void
243 CodeEmitter::prepareEmission(Function *func)
244 {
245 func->bbCount = 0;
246 func->bbArray = new BasicBlock * [func->cfg.getSize()];
247
248 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
249
250 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
251 prepareEmission(BasicBlock::get(*it));
252 }
253
254 void
255 CodeEmitter::prepareEmission(BasicBlock *bb)
256 {
257 Instruction *i, *next;
258 Function *func = bb->getFunction();
259 int j;
260 unsigned int nShort;
261
262 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
263
264 for (; j >= 0; --j) {
265 BasicBlock *in = func->bbArray[j];
266 Instruction *exit = in->getExit();
267
268 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
269 in->binSize -= 8;
270 func->binSize -= 8;
271
272 for (++j; j < func->bbCount; ++j)
273 func->bbArray[j]->binPos -= 8;
274
275 in->remove(exit);
276 }
277 bb->binPos = in->binPos + in->binSize;
278 if (in->binSize) // no more no-op branches to bb
279 break;
280 }
281 func->bbArray[func->bbCount++] = bb;
282
283 if (!bb->getExit())
284 return;
285
286 // determine encoding size, try to group short instructions
287 nShort = 0;
288 for (i = bb->getEntry(); i; i = next) {
289 next = i->next;
290
291 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
292 bb->remove(i);
293 continue;
294 }
295
296 i->encSize = getMinEncodingSize(i);
297 if (next && i->encSize < 8)
298 ++nShort;
299 else
300 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
301 if (i->isCommutationLegal(i->next)) {
302 bb->permuteAdjacent(i, next);
303 next->encSize = 4;
304 next = i;
305 i = i->prev;
306 ++nShort;
307 } else
308 if (i->isCommutationLegal(i->prev) && next->next) {
309 bb->permuteAdjacent(i->prev, i);
310 next->encSize = 4;
311 next = next->next;
312 bb->binSize += 4;
313 ++nShort;
314 } else {
315 i->encSize = 8;
316 i->prev->encSize = 8;
317 bb->binSize += 4;
318 nShort = 0;
319 }
320 } else {
321 i->encSize = 8;
322 if (nShort & 1) {
323 i->prev->encSize = 8;
324 bb->binSize += 4;
325 }
326 nShort = 0;
327 }
328 bb->binSize += i->encSize;
329 }
330
331 if (bb->getExit()->encSize == 4) {
332 assert(nShort);
333 bb->getExit()->encSize = 8;
334 bb->binSize += 4;
335
336 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
337 bb->binSize += 8;
338 bb->getExit()->prev->encSize = 8;
339 }
340 }
341 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
342
343 func->binSize += bb->binSize;
344 }
345
346 void
347 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
348 {
349 unsigned int n = 0, nMax = allFuncs.getSize();
350
351 info->bin.syms =
352 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
353
354 for (ArrayList::Iterator fi = allFuncs.iterator();
355 !fi.end();
356 fi.next(), ++n) {
357 Function *f = (Function *)fi.get();
358 assert(n < nMax);
359
360 info->bin.syms[n].label = f->getLabel();
361 info->bin.syms[n].offset = f->binPos;
362 }
363
364 info->bin.numSyms = n;
365 }
366
367 bool
368 Program::emitBinary(struct nv50_ir_prog_info *info)
369 {
370 CodeEmitter *emit = target->getCodeEmitter(progType);
371
372 emit->prepareEmission(this);
373
374 if (dbgFlags & NV50_IR_DEBUG_BASIC)
375 this->print();
376
377 if (!binSize) {
378 code = NULL;
379 return false;
380 }
381 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
382 if (!code)
383 return false;
384 emit->setCodeLocation(code, binSize);
385 info->bin.instructions = 0;
386
387 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
388 Function *fn = reinterpret_cast<Function *>(fi.get());
389
390 assert(emit->getCodeSize() == fn->binPos);
391
392 for (int b = 0; b < fn->bbCount; ++b) {
393 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
394 emit->emitInstruction(i);
395 info->bin.instructions++;
396 if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
397 (isFloatType(i->sType) || isFloatType(i->dType)))
398 info->io.fp64 = true;
399 }
400 }
401 }
402 info->bin.relocData = emit->getRelocInfo();
403 info->bin.fixupData = emit->getFixupInfo();
404
405 emitSymbolTable(info);
406
407 // the nvc0 driver will print the binary iself together with the header
408 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
409 emit->printBinary();
410
411 delete emit;
412 return true;
413 }
414
415 #define RELOC_ALLOC_INCREMENT 8
416
417 bool
418 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
419 int s)
420 {
421 unsigned int n = relocInfo ? relocInfo->count : 0;
422
423 if (!(n % RELOC_ALLOC_INCREMENT)) {
424 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
425 relocInfo = reinterpret_cast<RelocInfo *>(
426 REALLOC(relocInfo, n ? size : 0,
427 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
428 if (!relocInfo)
429 return false;
430 if (n == 0)
431 memset(relocInfo, 0, sizeof(RelocInfo));
432 }
433 ++relocInfo->count;
434
435 relocInfo->entry[n].data = data;
436 relocInfo->entry[n].mask = m;
437 relocInfo->entry[n].offset = codeSize + w * 4;
438 relocInfo->entry[n].bitPos = s;
439 relocInfo->entry[n].type = ty;
440
441 return true;
442 }
443
444 bool
445 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
446 {
447 unsigned int n = fixupInfo ? fixupInfo->count : 0;
448
449 if (!(n % RELOC_ALLOC_INCREMENT)) {
450 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
451 fixupInfo = reinterpret_cast<FixupInfo *>(
452 REALLOC(fixupInfo, n ? size : 0,
453 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
454 if (!fixupInfo)
455 return false;
456 if (n == 0)
457 memset(fixupInfo, 0, sizeof(FixupInfo));
458 }
459 ++fixupInfo->count;
460
461 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
462
463 return true;
464 }
465
466 void
467 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
468 {
469 uint32_t value = 0;
470
471 switch (type) {
472 case TYPE_CODE: value = info->codePos; break;
473 case TYPE_BUILTIN: value = info->libPos; break;
474 case TYPE_DATA: value = info->dataPos; break;
475 default:
476 assert(0);
477 break;
478 }
479 value += data;
480 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
481
482 binary[offset / 4] &= ~mask;
483 binary[offset / 4] |= value & mask;
484 }
485
486 } // namespace nv50_ir
487
488
489 #include "codegen/nv50_ir_driver.h"
490
491 extern "C" {
492
493 void
494 nv50_ir_relocate_code(void *relocData, uint32_t *code,
495 uint32_t codePos,
496 uint32_t libPos,
497 uint32_t dataPos)
498 {
499 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
500
501 info->codePos = codePos;
502 info->libPos = libPos;
503 info->dataPos = dataPos;
504
505 for (unsigned int i = 0; i < info->count; ++i)
506 info->entry[i].apply(code, info);
507 }
508
509 void
510 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
511 bool force_persample_interp, bool flatshade,
512 uint8_t alphatest)
513 {
514 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
515 fixupData);
516
517 // force_persample_interp: all non-flat -> per-sample
518 // flatshade: all color -> flat
519 // alphatest: PIPE_FUNC_* to use with alphatest
520 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
521 for (unsigned i = 0; i < info->count; ++i)
522 info->entry[i].apply(&info->entry[i], code, data);
523 }
524
525 void
526 nv50_ir_get_target_library(uint32_t chipset,
527 const uint32_t **code, uint32_t *size)
528 {
529 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
530 targ->getBuiltinCode(code, size);
531 nv50_ir::Target::destroy(targ);
532 }
533
534 }