Merge remote-tracking branch 'origin/master' into vulkan
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
48 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
50 0, // TEXBAR
51 1, 1, // DFDX, DFDY
52 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
53 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
54 2, 2, // ATOM, BAR
55 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
57 3, // SHFL
58 0
59 };
60
61 const OpClass Target::operationClass[] =
62 {
63 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
64 OPCLASS_OTHER,
65 OPCLASS_PSEUDO,
66 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
67 // MOV; LOAD; STORE
68 OPCLASS_MOVE,
69 OPCLASS_LOAD,
70 OPCLASS_STORE,
71 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
72 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
73 OPCLASS_ARITH, OPCLASS_ARITH,
74 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
75 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
76 OPCLASS_CONVERT, OPCLASS_CONVERT,
77 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
78 OPCLASS_SHIFT, OPCLASS_SHIFT,
79 // MAX, MIN
80 OPCLASS_COMPARE, OPCLASS_COMPARE,
81 // SAT, CEIL, FLOOR, TRUNC; CVT
82 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
83 OPCLASS_CONVERT,
84 // SET(AND,OR,XOR); SELP, SLCT
85 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
86 OPCLASS_COMPARE, OPCLASS_COMPARE,
87 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
88 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
89 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
90 OPCLASS_SFU, OPCLASS_SFU,
91 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
92 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
93 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
94 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
95 // DISCARD, EXIT
96 OPCLASS_FLOW, OPCLASS_FLOW,
97 // MEMBAR
98 OPCLASS_CONTROL,
99 // VFETCH, PFETCH, AFETCH, EXPORT
100 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
101 // LINTERP, PINTERP
102 OPCLASS_SFU, OPCLASS_SFU,
103 // EMIT, RESTART
104 OPCLASS_CONTROL, OPCLASS_CONTROL,
105 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
106 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
107 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
108 OPCLASS_TEXTURE, OPCLASS_TEXTURE,
109 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
110 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
111 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
112 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
113 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
114 // TEXBAR
115 OPCLASS_OTHER,
116 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
117 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
118 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
119 // POPCNT, INSBF, EXTBF, BFIND; PERMT
120 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
121 OPCLASS_BITFIELD,
122 // ATOM, BAR
123 OPCLASS_ATOMIC, OPCLASS_CONTROL,
124 // VADD, VAVG, VMIN, VMAX
125 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
126 // VSAD, VSET, VSHR, VSHL
127 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
128 // VSEL, CCTL
129 OPCLASS_VECTOR, OPCLASS_CONTROL,
130 // SHFL
131 OPCLASS_OTHER,
132 OPCLASS_PSEUDO // LAST
133 };
134
135
136 extern Target *getTargetGM107(unsigned int chipset);
137 extern Target *getTargetNVC0(unsigned int chipset);
138 extern Target *getTargetNV50(unsigned int chipset);
139
140 Target *Target::create(unsigned int chipset)
141 {
142 STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
143 STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
144 switch (chipset & ~0xf) {
145 case 0x110:
146 case 0x120:
147 return getTargetGM107(chipset);
148 case 0xc0:
149 case 0xd0:
150 case 0xe0:
151 case 0xf0:
152 case 0x100:
153 return getTargetNVC0(chipset);
154 case 0x50:
155 case 0x80:
156 case 0x90:
157 case 0xa0:
158 return getTargetNV50(chipset);
159 default:
160 ERROR("unsupported target: NV%x\n", chipset);
161 return 0;
162 }
163 }
164
165 void Target::destroy(Target *targ)
166 {
167 delete targ;
168 }
169
170 CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
171 {
172 }
173
174 void
175 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
176 {
177 code = reinterpret_cast<uint32_t *>(ptr);
178 codeSize = 0;
179 codeSizeLimit = size;
180 }
181
182 void
183 CodeEmitter::printBinary() const
184 {
185 uint32_t *bin = code - codeSize / 4;
186 INFO("program binary (%u bytes)", codeSize);
187 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
188 if ((pos % 8) == 0)
189 INFO("\n");
190 INFO("%08x ", bin[pos]);
191 }
192 INFO("\n");
193 }
194
195 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
196 {
197 return (size + 55) / 56;
198 }
199
200 void
201 CodeEmitter::prepareEmission(Program *prog)
202 {
203 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
204 !fi.end(); fi.next()) {
205 Function *func = reinterpret_cast<Function *>(fi.get());
206 func->binPos = prog->binSize;
207 prepareEmission(func);
208
209 // adjust sizes & positions for schedulding info:
210 if (prog->getTarget()->hasSWSched) {
211 uint32_t adjPos = func->binPos;
212 BasicBlock *bb = NULL;
213 for (int i = 0; i < func->bbCount; ++i) {
214 bb = func->bbArray[i];
215 int32_t adjSize = bb->binSize;
216 if (adjPos % 64) {
217 adjSize -= 64 - adjPos % 64;
218 if (adjSize < 0)
219 adjSize = 0;
220 }
221 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
222 bb->binPos = adjPos;
223 bb->binSize = adjSize;
224 adjPos += adjSize;
225 }
226 if (bb)
227 func->binSize = adjPos - func->binPos;
228 }
229
230 prog->binSize += func->binSize;
231 }
232 }
233
234 void
235 CodeEmitter::prepareEmission(Function *func)
236 {
237 func->bbCount = 0;
238 func->bbArray = new BasicBlock * [func->cfg.getSize()];
239
240 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
241
242 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
243 prepareEmission(BasicBlock::get(*it));
244 }
245
246 void
247 CodeEmitter::prepareEmission(BasicBlock *bb)
248 {
249 Instruction *i, *next;
250 Function *func = bb->getFunction();
251 int j;
252 unsigned int nShort;
253
254 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
255
256 for (; j >= 0; --j) {
257 BasicBlock *in = func->bbArray[j];
258 Instruction *exit = in->getExit();
259
260 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
261 in->binSize -= 8;
262 func->binSize -= 8;
263
264 for (++j; j < func->bbCount; ++j)
265 func->bbArray[j]->binPos -= 8;
266
267 in->remove(exit);
268 }
269 bb->binPos = in->binPos + in->binSize;
270 if (in->binSize) // no more no-op branches to bb
271 break;
272 }
273 func->bbArray[func->bbCount++] = bb;
274
275 if (!bb->getExit())
276 return;
277
278 // determine encoding size, try to group short instructions
279 nShort = 0;
280 for (i = bb->getEntry(); i; i = next) {
281 next = i->next;
282
283 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
284 bb->remove(i);
285 continue;
286 }
287
288 i->encSize = getMinEncodingSize(i);
289 if (next && i->encSize < 8)
290 ++nShort;
291 else
292 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
293 if (i->isCommutationLegal(i->next)) {
294 bb->permuteAdjacent(i, next);
295 next->encSize = 4;
296 next = i;
297 i = i->prev;
298 ++nShort;
299 } else
300 if (i->isCommutationLegal(i->prev) && next->next) {
301 bb->permuteAdjacent(i->prev, i);
302 next->encSize = 4;
303 next = next->next;
304 bb->binSize += 4;
305 ++nShort;
306 } else {
307 i->encSize = 8;
308 i->prev->encSize = 8;
309 bb->binSize += 4;
310 nShort = 0;
311 }
312 } else {
313 i->encSize = 8;
314 if (nShort & 1) {
315 i->prev->encSize = 8;
316 bb->binSize += 4;
317 }
318 nShort = 0;
319 }
320 bb->binSize += i->encSize;
321 }
322
323 if (bb->getExit()->encSize == 4) {
324 assert(nShort);
325 bb->getExit()->encSize = 8;
326 bb->binSize += 4;
327
328 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
329 bb->binSize += 8;
330 bb->getExit()->prev->encSize = 8;
331 }
332 }
333 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
334
335 func->binSize += bb->binSize;
336 }
337
338 void
339 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
340 {
341 unsigned int n = 0, nMax = allFuncs.getSize();
342
343 info->bin.syms =
344 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
345
346 for (ArrayList::Iterator fi = allFuncs.iterator();
347 !fi.end();
348 fi.next(), ++n) {
349 Function *f = (Function *)fi.get();
350 assert(n < nMax);
351
352 info->bin.syms[n].label = f->getLabel();
353 info->bin.syms[n].offset = f->binPos;
354 }
355
356 info->bin.numSyms = n;
357 }
358
359 bool
360 Program::emitBinary(struct nv50_ir_prog_info *info)
361 {
362 CodeEmitter *emit = target->getCodeEmitter(progType);
363
364 emit->prepareEmission(this);
365
366 if (dbgFlags & NV50_IR_DEBUG_BASIC)
367 this->print();
368
369 if (!binSize) {
370 code = NULL;
371 return false;
372 }
373 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
374 if (!code)
375 return false;
376 emit->setCodeLocation(code, binSize);
377 info->bin.instructions = 0;
378
379 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
380 Function *fn = reinterpret_cast<Function *>(fi.get());
381
382 assert(emit->getCodeSize() == fn->binPos);
383
384 for (int b = 0; b < fn->bbCount; ++b) {
385 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
386 emit->emitInstruction(i);
387 info->bin.instructions++;
388 if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
389 info->io.fp64 = true;
390 }
391 }
392 }
393 info->bin.relocData = emit->getRelocInfo();
394 info->bin.interpData = emit->getInterpInfo();
395
396 emitSymbolTable(info);
397
398 // the nvc0 driver will print the binary iself together with the header
399 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
400 emit->printBinary();
401
402 delete emit;
403 return true;
404 }
405
406 #define RELOC_ALLOC_INCREMENT 8
407
408 bool
409 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
410 int s)
411 {
412 unsigned int n = relocInfo ? relocInfo->count : 0;
413
414 if (!(n % RELOC_ALLOC_INCREMENT)) {
415 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
416 relocInfo = reinterpret_cast<RelocInfo *>(
417 REALLOC(relocInfo, n ? size : 0,
418 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
419 if (!relocInfo)
420 return false;
421 if (n == 0)
422 memset(relocInfo, 0, sizeof(RelocInfo));
423 }
424 ++relocInfo->count;
425
426 relocInfo->entry[n].data = data;
427 relocInfo->entry[n].mask = m;
428 relocInfo->entry[n].offset = codeSize + w * 4;
429 relocInfo->entry[n].bitPos = s;
430 relocInfo->entry[n].type = ty;
431
432 return true;
433 }
434
435 bool
436 CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
437 {
438 unsigned int n = interpInfo ? interpInfo->count : 0;
439
440 if (!(n % RELOC_ALLOC_INCREMENT)) {
441 size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
442 interpInfo = reinterpret_cast<InterpInfo *>(
443 REALLOC(interpInfo, n ? size : 0,
444 size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
445 if (!interpInfo)
446 return false;
447 if (n == 0)
448 memset(interpInfo, 0, sizeof(InterpInfo));
449 }
450 ++interpInfo->count;
451
452 interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
453 interpInfo->apply = apply;
454
455 return true;
456 }
457
458 void
459 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
460 {
461 uint32_t value = 0;
462
463 switch (type) {
464 case TYPE_CODE: value = info->codePos; break;
465 case TYPE_BUILTIN: value = info->libPos; break;
466 case TYPE_DATA: value = info->dataPos; break;
467 default:
468 assert(0);
469 break;
470 }
471 value += data;
472 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
473
474 binary[offset / 4] &= ~mask;
475 binary[offset / 4] |= value & mask;
476 }
477
478 } // namespace nv50_ir
479
480
481 #include "codegen/nv50_ir_driver.h"
482
483 extern "C" {
484
485 void
486 nv50_ir_relocate_code(void *relocData, uint32_t *code,
487 uint32_t codePos,
488 uint32_t libPos,
489 uint32_t dataPos)
490 {
491 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
492
493 info->codePos = codePos;
494 info->libPos = libPos;
495 info->dataPos = dataPos;
496
497 for (unsigned int i = 0; i < info->count; ++i)
498 info->entry[i].apply(code, info);
499 }
500
501 void
502 nv50_ir_change_interp(void *interpData, uint32_t *code,
503 bool force_persample_interp, bool flatshade)
504 {
505 nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>(
506 interpData);
507
508 // force_persample_interp: all non-flat -> per-sample
509 // flatshade: all color -> flat
510 for (unsigned i = 0; i < info->count; ++i)
511 info->apply(&info->entry[i], code, force_persample_interp, flatshade);
512 }
513
514 void
515 nv50_ir_get_target_library(uint32_t chipset,
516 const uint32_t **code, uint32_t *size)
517 {
518 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
519 targ->getBuiltinCode(code, size);
520 nv50_ir::Target::destroy(targ);
521 }
522
523 }