nv50,nvc0: add support for cube map arrays
[mesa.git] / src / gallium / drivers / nv50 / codegen / nv50_ir_target.cpp
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_target.h"
25
26 namespace nv50_ir {
27
28 const uint8_t Target::operationSrcNr[OP_LAST + 1] =
29 {
30 0, 0, // NOP, PHI
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
48 1, 2, // SULD, SUST
49 1, 1, // DFDX, DFDY
50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, TEXPREP, QUADOP, QUADON, QUADPOP
51 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
52 0
53 };
54
55 const OpClass Target::operationClass[OP_LAST + 1] =
56 {
57 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
58 OPCLASS_OTHER,
59 OPCLASS_PSEUDO,
60 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
61 // MOV; LOAD; STORE
62 OPCLASS_MOVE,
63 OPCLASS_LOAD,
64 OPCLASS_STORE,
65 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
66 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
67 OPCLASS_ARITH, OPCLASS_ARITH,
68 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
69 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
70 OPCLASS_CONVERT, OPCLASS_CONVERT,
71 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
72 OPCLASS_SHIFT, OPCLASS_SHIFT,
73 // MAX, MIN
74 OPCLASS_COMPARE, OPCLASS_COMPARE,
75 // SAT, CEIL, FLOOR, TRUNC; CVT
76 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
77 OPCLASS_CONVERT,
78 // SET(AND,OR,XOR); SELP, SLCT
79 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
80 OPCLASS_COMPARE, OPCLASS_COMPARE,
81 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
82 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
83 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
84 OPCLASS_SFU, OPCLASS_SFU,
85 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
86 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
87 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
88 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
89 // DISCARD, EXIT
90 OPCLASS_FLOW, OPCLASS_FLOW,
91 // MEMBAR
92 OPCLASS_OTHER,
93 // VFETCH, PFETCH, EXPORT
94 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
95 // LINTERP, PINTERP
96 OPCLASS_SFU, OPCLASS_SFU,
97 // EMIT, RESTART
98 OPCLASS_OTHER, OPCLASS_OTHER,
99 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
100 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
101 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
102 // SULD, SUST
103 OPCLASS_SURFACE, OPCLASS_SURFACE,
104 // DFDX, DFDY, RDSV, WRSV; TEXPREP, QUADOP, QUADON, QUADPOP
105 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
106 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
107 // POPCNT, INSBF, EXTBF
108 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
109 // TEXBAR
110 OPCLASS_OTHER,
111 OPCLASS_PSEUDO // LAST
112 };
113
114
115 extern Target *getTargetNVC0(unsigned int chipset);
116 extern Target *getTargetNV50(unsigned int chipset);
117
118 Target *Target::create(unsigned int chipset)
119 {
120 switch (chipset & 0xf0) {
121 case 0xc0:
122 case 0xd0:
123 case 0xe0:
124 case NVISA_GK110_CHIPSET:
125 return getTargetNVC0(chipset);
126 case 0x50:
127 case 0x80:
128 case 0x90:
129 case 0xa0:
130 return getTargetNV50(chipset);
131 default:
132 ERROR("unsupported target: NV%x\n", chipset);
133 return 0;
134 }
135 }
136
137 void Target::destroy(Target *targ)
138 {
139 delete targ;
140 }
141
142 CodeEmitter::CodeEmitter(const Target *target) : targ(target)
143 {
144 }
145
146 void
147 CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
148 {
149 code = reinterpret_cast<uint32_t *>(ptr);
150 codeSize = 0;
151 codeSizeLimit = size;
152 }
153
154 void
155 CodeEmitter::printBinary() const
156 {
157 uint32_t *bin = code - codeSize / 4;
158 INFO("program binary (%u bytes)", codeSize);
159 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
160 if ((pos % 8) == 0)
161 INFO("\n");
162 INFO("%08x ", bin[pos]);
163 }
164 INFO("\n");
165 }
166
167 static inline uint32_t sizeToBundlesNVE4(uint32_t size)
168 {
169 return (size + 55) / 56;
170 }
171
172 void
173 CodeEmitter::prepareEmission(Program *prog)
174 {
175 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
176 !fi.end(); fi.next()) {
177 Function *func = reinterpret_cast<Function *>(fi.get());
178 func->binPos = prog->binSize;
179 prepareEmission(func);
180
181 // adjust sizes & positions for schedulding info:
182 if (prog->getTarget()->hasSWSched) {
183 BasicBlock *bb = NULL;
184 for (int i = 0; i < func->bbCount; ++i) {
185 bb = func->bbArray[i];
186 const uint32_t oldPos = bb->binPos;
187 const uint32_t oldEnd = bb->binPos + bb->binSize;
188 uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8;
189 uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8;
190 bb->binPos = adjPos;
191 bb->binSize = adjEnd - adjPos;
192 }
193 if (bb)
194 func->binSize = bb->binPos + bb->binSize;
195 }
196
197 prog->binSize += func->binSize;
198 }
199 }
200
201 void
202 CodeEmitter::prepareEmission(Function *func)
203 {
204 func->bbCount = 0;
205 func->bbArray = new BasicBlock * [func->cfg.getSize()];
206
207 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
208
209 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
210 prepareEmission(BasicBlock::get(*it));
211 }
212
213 void
214 CodeEmitter::prepareEmission(BasicBlock *bb)
215 {
216 Instruction *i, *next;
217 Function *func = bb->getFunction();
218 int j;
219 unsigned int nShort;
220
221 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
222
223 for (; j >= 0; --j) {
224 BasicBlock *in = func->bbArray[j];
225 Instruction *exit = in->getExit();
226
227 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
228 in->binSize -= 8;
229 func->binSize -= 8;
230
231 for (++j; j < func->bbCount; ++j)
232 func->bbArray[j]->binPos -= 8;
233
234 in->remove(exit);
235 }
236 bb->binPos = in->binPos + in->binSize;
237 if (in->binSize) // no more no-op branches to bb
238 break;
239 }
240 func->bbArray[func->bbCount++] = bb;
241
242 if (!bb->getExit())
243 return;
244
245 // determine encoding size, try to group short instructions
246 nShort = 0;
247 for (i = bb->getEntry(); i; i = next) {
248 next = i->next;
249
250 i->encSize = getMinEncodingSize(i);
251 if (next && i->encSize < 8)
252 ++nShort;
253 else
254 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
255 if (i->isCommutationLegal(i->next)) {
256 bb->permuteAdjacent(i, next);
257 next->encSize = 4;
258 next = i;
259 i = i->prev;
260 ++nShort;
261 } else
262 if (i->isCommutationLegal(i->prev) && next->next) {
263 bb->permuteAdjacent(i->prev, i);
264 next->encSize = 4;
265 next = next->next;
266 bb->binSize += 4;
267 ++nShort;
268 } else {
269 i->encSize = 8;
270 i->prev->encSize = 8;
271 bb->binSize += 4;
272 nShort = 0;
273 }
274 } else {
275 i->encSize = 8;
276 if (nShort & 1) {
277 i->prev->encSize = 8;
278 bb->binSize += 4;
279 }
280 nShort = 0;
281 }
282 bb->binSize += i->encSize;
283 }
284
285 if (bb->getExit()->encSize == 4) {
286 assert(nShort);
287 bb->getExit()->encSize = 8;
288 bb->binSize += 4;
289
290 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
291 bb->binSize += 8;
292 bb->getExit()->prev->encSize = 8;
293 }
294 }
295 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
296
297 func->binSize += bb->binSize;
298 }
299
300 void
301 Program::emitSymbolTable(struct nv50_ir_prog_info *info)
302 {
303 unsigned int n = 0, nMax = allFuncs.getSize();
304
305 info->bin.syms =
306 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
307
308 for (ArrayList::Iterator fi = allFuncs.iterator();
309 !fi.end();
310 fi.next(), ++n) {
311 Function *f = (Function *)fi.get();
312 assert(n < nMax);
313
314 info->bin.syms[n].label = f->getLabel();
315 info->bin.syms[n].offset = f->binPos;
316 }
317
318 info->bin.numSyms = n;
319 }
320
321 bool
322 Program::emitBinary(struct nv50_ir_prog_info *info)
323 {
324 CodeEmitter *emit = target->getCodeEmitter(progType);
325
326 emit->prepareEmission(this);
327
328 if (dbgFlags & NV50_IR_DEBUG_BASIC)
329 this->print();
330
331 if (!binSize) {
332 code = NULL;
333 return false;
334 }
335 code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
336 if (!code)
337 return false;
338 emit->setCodeLocation(code, binSize);
339
340 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
341 Function *fn = reinterpret_cast<Function *>(fi.get());
342
343 assert(emit->getCodeSize() == fn->binPos);
344
345 for (int b = 0; b < fn->bbCount; ++b)
346 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
347 emit->emitInstruction(i);
348 }
349 info->bin.relocData = emit->getRelocInfo();
350
351 emitSymbolTable(info);
352
353 // the nvc0 driver will print the binary iself together with the header
354 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
355 emit->printBinary();
356
357 delete emit;
358 return true;
359 }
360
361 #define RELOC_ALLOC_INCREMENT 8
362
363 bool
364 CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
365 int s)
366 {
367 unsigned int n = relocInfo ? relocInfo->count : 0;
368
369 if (!(n % RELOC_ALLOC_INCREMENT)) {
370 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
371 relocInfo = reinterpret_cast<RelocInfo *>(
372 REALLOC(relocInfo, n ? size : 0,
373 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
374 if (!relocInfo)
375 return false;
376 if (n == 0)
377 memset(relocInfo, 0, sizeof(RelocInfo));
378 }
379 ++relocInfo->count;
380
381 relocInfo->entry[n].data = data;
382 relocInfo->entry[n].mask = m;
383 relocInfo->entry[n].offset = codeSize + w * 4;
384 relocInfo->entry[n].bitPos = s;
385 relocInfo->entry[n].type = ty;
386
387 return true;
388 }
389
390 void
391 RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
392 {
393 uint32_t value = 0;
394
395 switch (type) {
396 case TYPE_CODE: value = info->codePos; break;
397 case TYPE_BUILTIN: value = info->libPos; break;
398 case TYPE_DATA: value = info->dataPos; break;
399 default:
400 assert(0);
401 break;
402 }
403 value += data;
404 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
405
406 binary[offset / 4] &= ~mask;
407 binary[offset / 4] |= value & mask;
408 }
409
410 } // namespace nv50_ir
411
412
413 #include "nv50/codegen/nv50_ir_driver.h"
414
415 extern "C" {
416
417 void
418 nv50_ir_relocate_code(void *relocData, uint32_t *code,
419 uint32_t codePos,
420 uint32_t libPos,
421 uint32_t dataPos)
422 {
423 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
424
425 info->codePos = codePos;
426 info->libPos = libPos;
427 info->dataPos = dataPos;
428
429 for (unsigned int i = 0; i < info->count; ++i)
430 info->entry[i].apply(code, info);
431 }
432
433 void
434 nv50_ir_get_target_library(uint32_t chipset,
435 const uint32_t **code, uint32_t *size)
436 {
437 nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
438 targ->getBuiltinCode(code, size);
439 nv50_ir::Target::destroy(targ);
440 }
441
442 }