nvc0: add support for PIPE_CAP_SAMPLE_SHADING
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
48 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
50 0, // TEXBAR
51 1, 1, // DFDX, DFDY
52 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
53 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT
54 2, 2, // ATOM, BAR
55 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
57 0
58 };
59
60 const OpClass Target::operationClass[] =
61 {
62 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
63 OPCLASS_OTHER,
64 OPCLASS_PSEUDO,
65 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
66 // MOV; LOAD; STORE
67 OPCLASS_MOVE,
68 OPCLASS_LOAD,
69 OPCLASS_STORE,
70 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
71 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
72 OPCLASS_ARITH, OPCLASS_ARITH,
73 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
74 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
75 OPCLASS_CONVERT, OPCLASS_CONVERT,
76 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
77 OPCLASS_SHIFT, OPCLASS_SHIFT,
78 // MAX, MIN
79 OPCLASS_COMPARE, OPCLASS_COMPARE,
80 // SAT, CEIL, FLOOR, TRUNC; CVT
81 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
82 OPCLASS_CONVERT,
83 // SET(AND,OR,XOR); SELP, SLCT
84 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
85 OPCLASS_COMPARE, OPCLASS_COMPARE,
86 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
87 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
88 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
89 OPCLASS_SFU, OPCLASS_SFU,
90 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
91 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
92 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
93 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
94 // DISCARD, EXIT
95 OPCLASS_FLOW, OPCLASS_FLOW,
96 // MEMBAR
97 OPCLASS_CONTROL,
98 // VFETCH, PFETCH, EXPORT
99 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
100 // LINTERP, PINTERP
101 OPCLASS_SFU, OPCLASS_SFU,
102 // EMIT, RESTART
103 OPCLASS_CONTROL, OPCLASS_CONTROL,
104 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
105 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
106 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
107 OPCLASS_TEXTURE, OPCLASS_TEXTURE,
108 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
109 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
110 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
111 // SUBFM, SUCLAMP, SUEAU, MADSP
112 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
113 // TEXBAR
114 OPCLASS_OTHER,
115 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
116 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
117 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
118 // POPCNT, INSBF, EXTBF, PERMT
119 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
120 // ATOM, BAR
121 OPCLASS_ATOMIC, OPCLASS_CONTROL,
122 // VADD, VAVG, VMIN, VMAX
123 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
124 // VSAD, VSET, VSHR, VSHL
125 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
126 // VSEL, CCTL
127 OPCLASS_VECTOR, OPCLASS_CONTROL,
128 OPCLASS_PSEUDO // LAST
129 };
130
131
132 extern Target *getTargetNVC0(unsigned int chipset);
133 extern Target *getTargetNV50(unsigned int chipset);
134
135 Target *Target::create(unsigned int chipset)
136 {
137 STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
138 STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
139 switch (chipset & ~0xf) {
140 case 0xc0:
141 case 0xd0:
142 case 0xe0:
143 case 0xf0:
144 case 0x100:
145 return getTargetNVC0(chipset);
146 case 0x50:
147 case 0x80:
148 case 0x90:
149 case 0xa0:
150 return getTargetNV50(chipset);
151 default:
152 ERROR("unsupported target: NV%x\n", chipset);
153 return 0;
154 }
155 }
156
157 void Target::destroy(Target *targ)
158 {
159 delete targ;
160 }
161
162 CodeEmitter::CodeEmitter(const Target *target) : targ(target)
163 {
164 }
165
166 void
167 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
168 {
169 code = reinterpret_cast<uint32_t *>(ptr);
170 codeSize = 0;
171 codeSizeLimit = size;
172 }
173
174 void
175 CodeEmitter::printBinary() const
176 {
177 uint32_t *bin = code - codeSize / 4;
178 INFO("program binary (%u bytes)", codeSize);
179 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
180 if ((pos % 8) == 0)
181 INFO("\n");
182 INFO("%08x ", bin[pos]);
183 }
184 INFO("\n");
185 }
186
187 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
188 {
189 return (size + 55) / 56;
190 }
191
192 void
193 CodeEmitter::prepareEmission(Program *prog)
194 {
195 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
196 !fi.end(); fi.next()) {
197 Function *func = reinterpret_cast<Function *>(fi.get());
198 func->binPos = prog->binSize;
199 prepareEmission(func);
200
201 // adjust sizes & positions for schedulding info:
202 if (prog->getTarget()->hasSWSched) {
203 uint32_t adjPos = func->binPos;
204 BasicBlock *bb = NULL;
205 for (int i = 0; i < func->bbCount; ++i) {
206 bb = func->bbArray[i];
207 int32_t adjSize = bb->binSize;
208 if (adjPos % 64) {
209 adjSize -= 64 - adjPos % 64;
210 if (adjSize < 0)
211 adjSize = 0;
212 }
213 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
214 bb->binPos = adjPos;
215 bb->binSize = adjSize;
216 adjPos += adjSize;
217 }
218 if (bb)
219 func->binSize = adjPos - func->binPos;
220 }
221
222 prog->binSize += func->binSize;
223 }
224 }
225
226 void
227 CodeEmitter::prepareEmission(Function *func)
228 {
229 func->bbCount = 0;
230 func->bbArray = new BasicBlock * [func->cfg.getSize()];
231
232 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
233
234 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
235 prepareEmission(BasicBlock::get(*it));
236 }
237
238 void
239 CodeEmitter::prepareEmission(BasicBlock *bb)
240 {
241 Instruction *i, *next;
242 Function *func = bb->getFunction();
243 int j;
244 unsigned int nShort;
245
246 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
247
248 for (; j >= 0; --j) {
249 BasicBlock *in = func->bbArray[j];
250 Instruction *exit = in->getExit();
251
252 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
253 in->binSize -= 8;
254 func->binSize -= 8;
255
256 for (++j; j < func->bbCount; ++j)
257 func->bbArray[j]->binPos -= 8;
258
259 in->remove(exit);
260 }
261 bb->binPos = in->binPos + in->binSize;
262 if (in->binSize) // no more no-op branches to bb
263 break;
264 }
265 func->bbArray[func->bbCount++] = bb;
266
267 if (!bb->getExit())
268 return;
269
270 // determine encoding size, try to group short instructions
271 nShort = 0;
272 for (i = bb->getEntry(); i; i = next) {
273 next = i->next;
274
275 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
276 bb->remove(i);
277 continue;
278 }
279
280 i->encSize = getMinEncodingSize(i);
281 if (next && i->encSize < 8)
282 ++nShort;
283 else
284 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
285 if (i->isCommutationLegal(i->next)) {
286 bb->permuteAdjacent(i, next);
287 next->encSize = 4;
288 next = i;
289 i = i->prev;
290 ++nShort;
291 } else
292 if (i->isCommutationLegal(i->prev) && next->next) {
293 bb->permuteAdjacent(i->prev, i);
294 next->encSize = 4;
295 next = next->next;
296 bb->binSize += 4;
297 ++nShort;
298 } else {
299 i->encSize = 8;
300 i->prev->encSize = 8;
301 bb->binSize += 4;
302 nShort = 0;
303 }
304 } else {
305 i->encSize = 8;
306 if (nShort & 1) {
307 i->prev->encSize = 8;
308 bb->binSize += 4;
309 }
310 nShort = 0;
311 }
312 bb->binSize += i->encSize;
313 }
314
315 if (bb->getExit()->encSize == 4) {
316 assert(nShort);
317 bb->getExit()->encSize = 8;
318 bb->binSize += 4;
319
320 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
321 bb->binSize += 8;
322 bb->getExit()->prev->encSize = 8;
323 }
324 }
325 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
326
327 func->binSize += bb->binSize;
328 }
329
330 void
331 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
332 {
333 unsigned int n = 0, nMax = allFuncs.getSize();
334
335 info->bin.syms =
336 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
337
338 for (ArrayList::Iterator fi = allFuncs.iterator();
339 !fi.end();
340 fi.next(), ++n) {
341 Function *f = (Function *)fi.get();
342 assert(n < nMax);
343
344 info->bin.syms[n].label = f->getLabel();
345 info->bin.syms[n].offset = f->binPos;
346 }
347
348 info->bin.numSyms = n;
349 }
350
351 bool
352 Program::emitBinary(struct nv50_ir_prog_info *info)
353 {
354 CodeEmitter *emit = target->getCodeEmitter(progType);
355
356 emit->prepareEmission(this);
357
358 if (dbgFlags & NV50_IR_DEBUG_BASIC)
359 this->print();
360
361 if (!binSize) {
362 code = NULL;
363 return false;
364 }
365 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
366 if (!code)
367 return false;
368 emit->setCodeLocation(code, binSize);
369
370 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
371 Function *fn = reinterpret_cast<Function *>(fi.get());
372
373 assert(emit->getCodeSize() == fn->binPos);
374
375 for (int b = 0; b < fn->bbCount; ++b)
376 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
377 emit->emitInstruction(i);
378 }
379 info->bin.relocData = emit->getRelocInfo();
380
381 emitSymbolTable(info);
382
383 // the nvc0 driver will print the binary iself together with the header
384 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
385 emit->printBinary();
386
387 delete emit;
388 return true;
389 }
390
391 #define RELOC_ALLOC_INCREMENT 8
392
393 bool
394 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
395 int s)
396 {
397 unsigned int n = relocInfo ? relocInfo->count : 0;
398
399 if (!(n % RELOC_ALLOC_INCREMENT)) {
400 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
401 relocInfo = reinterpret_cast<RelocInfo *>(
402 REALLOC(relocInfo, n ? size : 0,
403 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
404 if (!relocInfo)
405 return false;
406 if (n == 0)
407 memset(relocInfo, 0, sizeof(RelocInfo));
408 }
409 ++relocInfo->count;
410
411 relocInfo->entry[n].data = data;
412 relocInfo->entry[n].mask = m;
413 relocInfo->entry[n].offset = codeSize + w * 4;
414 relocInfo->entry[n].bitPos = s;
415 relocInfo->entry[n].type = ty;
416
417 return true;
418 }
419
420 void
421 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
422 {
423 uint32_t value = 0;
424
425 switch (type) {
426 case TYPE_CODE: value = info->codePos; break;
427 case TYPE_BUILTIN: value = info->libPos; break;
428 case TYPE_DATA: value = info->dataPos; break;
429 default:
430 assert(0);
431 break;
432 }
433 value += data;
434 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
435
436 binary[offset / 4] &= ~mask;
437 binary[offset / 4] |= value & mask;
438 }
439
440 } // namespace nv50_ir
441
442
443 #include "codegen/nv50_ir_driver.h"
444
445 extern "C" {
446
447 void
448 nv50_ir_relocate_code(void *relocData, uint32_t *code,
449 uint32_t codePos,
450 uint32_t libPos,
451 uint32_t dataPos)
452 {
453 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
454
455 info->codePos = codePos;
456 info->libPos = libPos;
457 info->dataPos = dataPos;
458
459 for (unsigned int i = 0; i < info->count; ++i)
460 info->entry[i].apply(code, info);
461 }
462
463 void
464 nv50_ir_get_target_library(uint32_t chipset,
465 const uint32_t **code, uint32_t *size)
466 {
467 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
468 targ->getBuiltinCode(code, size);
469 nv50_ir::Target::destroy(targ);
470 }
471
472 }