nv50/ir: add preliminary support for SHLADD
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
48 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
50 0, // TEXBAR
51 1, 1, // DFDX, DFDY
52 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
53 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
54 2, 2, // ATOM, BAR
55 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
57 3, // SHFL
58 1, // VOTE
59 1, // BUFQ
60 0
61 };
62
63 const OpClass Target::operationClass[] =
64 {
65 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
66 OPCLASS_OTHER,
67 OPCLASS_PSEUDO,
68 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
69 // MOV; LOAD; STORE
70 OPCLASS_MOVE,
71 OPCLASS_LOAD,
72 OPCLASS_STORE,
73 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
74 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
75 OPCLASS_ARITH, OPCLASS_ARITH,
76 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
77 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
78 OPCLASS_CONVERT, OPCLASS_CONVERT,
79 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
80 OPCLASS_SHIFT, OPCLASS_SHIFT,
81 // MAX, MIN
82 OPCLASS_COMPARE, OPCLASS_COMPARE,
83 // SAT, CEIL, FLOOR, TRUNC; CVT
84 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
85 OPCLASS_CONVERT,
86 // SET(AND,OR,XOR); SELP, SLCT
87 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
88 OPCLASS_COMPARE, OPCLASS_COMPARE,
89 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
90 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
91 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
92 OPCLASS_SFU, OPCLASS_SFU,
93 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
94 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
95 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
96 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
97 // DISCARD, EXIT
98 OPCLASS_FLOW, OPCLASS_FLOW,
99 // MEMBAR
100 OPCLASS_CONTROL,
101 // VFETCH, PFETCH, AFETCH, EXPORT
102 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
103 // LINTERP, PINTERP
104 OPCLASS_SFU, OPCLASS_SFU,
105 // EMIT, RESTART
106 OPCLASS_CONTROL, OPCLASS_CONTROL,
107 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
108 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
109 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
110 OPCLASS_TEXTURE, OPCLASS_TEXTURE,
111 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
112 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
113 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
114 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
115 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
116 // TEXBAR
117 OPCLASS_OTHER,
118 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
119 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
120 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
121 // POPCNT, INSBF, EXTBF, BFIND; PERMT
122 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
123 OPCLASS_BITFIELD,
124 // ATOM, BAR
125 OPCLASS_ATOMIC, OPCLASS_CONTROL,
126 // VADD, VAVG, VMIN, VMAX
127 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
128 // VSAD, VSET, VSHR, VSHL
129 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
130 // VSEL, CCTL
131 OPCLASS_VECTOR, OPCLASS_CONTROL,
132 // SHFL
133 OPCLASS_OTHER,
134 // VOTE
135 OPCLASS_OTHER,
136 // BUFQ
137 OPCLASS_OTHER,
138 OPCLASS_PSEUDO // LAST
139 };
140
141
142 extern Target *getTargetGM107(unsigned int chipset);
143 extern Target *getTargetNVC0(unsigned int chipset);
144 extern Target *getTargetNV50(unsigned int chipset);
145
146 Target *Target::create(unsigned int chipset)
147 {
148 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
149 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
150 switch (chipset & ~0xf) {
151 case 0x110:
152 case 0x120:
153 case 0x130:
154 return getTargetGM107(chipset);
155 case 0xc0:
156 case 0xd0:
157 case 0xe0:
158 case 0xf0:
159 case 0x100:
160 return getTargetNVC0(chipset);
161 case 0x50:
162 case 0x80:
163 case 0x90:
164 case 0xa0:
165 return getTargetNV50(chipset);
166 default:
167 ERROR("unsupported target: NV%x\n", chipset);
168 return 0;
169 }
170 }
171
172 void Target::destroy(Target *targ)
173 {
174 delete targ;
175 }
176
177 CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
178 {
179 }
180
181 void
182 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
183 {
184 code = reinterpret_cast<uint32_t *>(ptr);
185 codeSize = 0;
186 codeSizeLimit = size;
187 }
188
189 void
190 CodeEmitter::printBinary() const
191 {
192 uint32_t *bin = code - codeSize / 4;
193 INFO("program binary (%u bytes)", codeSize);
194 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
195 if ((pos % 8) == 0)
196 INFO("\n");
197 INFO("%08x ", bin[pos]);
198 }
199 INFO("\n");
200 }
201
202 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
203 {
204 return (size + 55) / 56;
205 }
206
207 void
208 CodeEmitter::prepareEmission(Program *prog)
209 {
210 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
211 !fi.end(); fi.next()) {
212 Function *func = reinterpret_cast<Function *>(fi.get());
213 func->binPos = prog->binSize;
214 prepareEmission(func);
215
216 // adjust sizes & positions for schedulding info:
217 if (prog->getTarget()->hasSWSched) {
218 uint32_t adjPos = func->binPos;
219 BasicBlock *bb = NULL;
220 for (int i = 0; i < func->bbCount; ++i) {
221 bb = func->bbArray[i];
222 int32_t adjSize = bb->binSize;
223 if (adjPos % 64) {
224 adjSize -= 64 - adjPos % 64;
225 if (adjSize < 0)
226 adjSize = 0;
227 }
228 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
229 bb->binPos = adjPos;
230 bb->binSize = adjSize;
231 adjPos += adjSize;
232 }
233 if (bb)
234 func->binSize = adjPos - func->binPos;
235 }
236
237 prog->binSize += func->binSize;
238 }
239 }
240
241 void
242 CodeEmitter::prepareEmission(Function *func)
243 {
244 func->bbCount = 0;
245 func->bbArray = new BasicBlock * [func->cfg.getSize()];
246
247 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
248
249 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
250 prepareEmission(BasicBlock::get(*it));
251 }
252
253 void
254 CodeEmitter::prepareEmission(BasicBlock *bb)
255 {
256 Instruction *i, *next;
257 Function *func = bb->getFunction();
258 int j;
259 unsigned int nShort;
260
261 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
262
263 for (; j >= 0; --j) {
264 BasicBlock *in = func->bbArray[j];
265 Instruction *exit = in->getExit();
266
267 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
268 in->binSize -= 8;
269 func->binSize -= 8;
270
271 for (++j; j < func->bbCount; ++j)
272 func->bbArray[j]->binPos -= 8;
273
274 in->remove(exit);
275 }
276 bb->binPos = in->binPos + in->binSize;
277 if (in->binSize) // no more no-op branches to bb
278 break;
279 }
280 func->bbArray[func->bbCount++] = bb;
281
282 if (!bb->getExit())
283 return;
284
285 // determine encoding size, try to group short instructions
286 nShort = 0;
287 for (i = bb->getEntry(); i; i = next) {
288 next = i->next;
289
290 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
291 bb->remove(i);
292 continue;
293 }
294
295 i->encSize = getMinEncodingSize(i);
296 if (next && i->encSize < 8)
297 ++nShort;
298 else
299 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
300 if (i->isCommutationLegal(i->next)) {
301 bb->permuteAdjacent(i, next);
302 next->encSize = 4;
303 next = i;
304 i = i->prev;
305 ++nShort;
306 } else
307 if (i->isCommutationLegal(i->prev) && next->next) {
308 bb->permuteAdjacent(i->prev, i);
309 next->encSize = 4;
310 next = next->next;
311 bb->binSize += 4;
312 ++nShort;
313 } else {
314 i->encSize = 8;
315 i->prev->encSize = 8;
316 bb->binSize += 4;
317 nShort = 0;
318 }
319 } else {
320 i->encSize = 8;
321 if (nShort & 1) {
322 i->prev->encSize = 8;
323 bb->binSize += 4;
324 }
325 nShort = 0;
326 }
327 bb->binSize += i->encSize;
328 }
329
330 if (bb->getExit()->encSize == 4) {
331 assert(nShort);
332 bb->getExit()->encSize = 8;
333 bb->binSize += 4;
334
335 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
336 bb->binSize += 8;
337 bb->getExit()->prev->encSize = 8;
338 }
339 }
340 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
341
342 func->binSize += bb->binSize;
343 }
344
345 void
346 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
347 {
348 unsigned int n = 0, nMax = allFuncs.getSize();
349
350 info->bin.syms =
351 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
352
353 for (ArrayList::Iterator fi = allFuncs.iterator();
354 !fi.end();
355 fi.next(), ++n) {
356 Function *f = (Function *)fi.get();
357 assert(n < nMax);
358
359 info->bin.syms[n].label = f->getLabel();
360 info->bin.syms[n].offset = f->binPos;
361 }
362
363 info->bin.numSyms = n;
364 }
365
366 bool
367 Program::emitBinary(struct nv50_ir_prog_info *info)
368 {
369 CodeEmitter *emit = target->getCodeEmitter(progType);
370
371 emit->prepareEmission(this);
372
373 if (dbgFlags & NV50_IR_DEBUG_BASIC)
374 this->print();
375
376 if (!binSize) {
377 code = NULL;
378 return false;
379 }
380 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
381 if (!code)
382 return false;
383 emit->setCodeLocation(code, binSize);
384 info->bin.instructions = 0;
385
386 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
387 Function *fn = reinterpret_cast<Function *>(fi.get());
388
389 assert(emit->getCodeSize() == fn->binPos);
390
391 for (int b = 0; b < fn->bbCount; ++b) {
392 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
393 emit->emitInstruction(i);
394 info->bin.instructions++;
395 if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
396 info->io.fp64 = true;
397 }
398 }
399 }
400 info->bin.relocData = emit->getRelocInfo();
401 info->bin.fixupData = emit->getFixupInfo();
402
403 emitSymbolTable(info);
404
405 // the nvc0 driver will print the binary iself together with the header
406 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
407 emit->printBinary();
408
409 delete emit;
410 return true;
411 }
412
413 #define RELOC_ALLOC_INCREMENT 8
414
415 bool
416 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
417 int s)
418 {
419 unsigned int n = relocInfo ? relocInfo->count : 0;
420
421 if (!(n % RELOC_ALLOC_INCREMENT)) {
422 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
423 relocInfo = reinterpret_cast<RelocInfo *>(
424 REALLOC(relocInfo, n ? size : 0,
425 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
426 if (!relocInfo)
427 return false;
428 if (n == 0)
429 memset(relocInfo, 0, sizeof(RelocInfo));
430 }
431 ++relocInfo->count;
432
433 relocInfo->entry[n].data = data;
434 relocInfo->entry[n].mask = m;
435 relocInfo->entry[n].offset = codeSize + w * 4;
436 relocInfo->entry[n].bitPos = s;
437 relocInfo->entry[n].type = ty;
438
439 return true;
440 }
441
442 bool
443 CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
444 {
445 unsigned int n = fixupInfo ? fixupInfo->count : 0;
446
447 if (!(n % RELOC_ALLOC_INCREMENT)) {
448 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
449 fixupInfo = reinterpret_cast<FixupInfo *>(
450 REALLOC(fixupInfo, n ? size : 0,
451 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
452 if (!fixupInfo)
453 return false;
454 if (n == 0)
455 memset(fixupInfo, 0, sizeof(FixupInfo));
456 }
457 ++fixupInfo->count;
458
459 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
460
461 return true;
462 }
463
464 void
465 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
466 {
467 uint32_t value = 0;
468
469 switch (type) {
470 case TYPE_CODE: value = info->codePos; break;
471 case TYPE_BUILTIN: value = info->libPos; break;
472 case TYPE_DATA: value = info->dataPos; break;
473 default:
474 assert(0);
475 break;
476 }
477 value += data;
478 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
479
480 binary[offset / 4] &= ~mask;
481 binary[offset / 4] |= value & mask;
482 }
483
484 } // namespace nv50_ir
485
486
487 #include "codegen/nv50_ir_driver.h"
488
489 extern "C" {
490
491 void
492 nv50_ir_relocate_code(void *relocData, uint32_t *code,
493 uint32_t codePos,
494 uint32_t libPos,
495 uint32_t dataPos)
496 {
497 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
498
499 info->codePos = codePos;
500 info->libPos = libPos;
501 info->dataPos = dataPos;
502
503 for (unsigned int i = 0; i < info->count; ++i)
504 info->entry[i].apply(code, info);
505 }
506
507 void
508 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
509 bool force_persample_interp, bool flatshade,
510 uint8_t alphatest)
511 {
512 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
513 fixupData);
514
515 // force_persample_interp: all non-flat -> per-sample
516 // flatshade: all color -> flat
517 // alphatest: PIPE_FUNC_* to use with alphatest
518 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
519 for (unsigned i = 0; i < info->count; ++i)
520 info->entry[i].apply(&info->entry[i], code, data);
521 }
522
523 void
524 nv50_ir_get_target_library(uint32_t chipset,
525 const uint32_t **code, uint32_t *size)
526 {
527 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
528 targ->getBuiltinCode(code, size);
529 nv50_ir::Target::destroy(targ);
530 }
531
532 }