9193a01f189874a7fb384529a4f34fbc9148a452
2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir.h"
24 #include "codegen/nv50_ir_target.h"
28 const uint8_t Target::operationSrcNr
[] =
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
35 1, 1, 1, // ABS, NEG, NOT
36 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
37 2, 2, 1, // MAX, MIN, SAT
38 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
39 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
40 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
41 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
43 0, 0, 0, // PRERET,CONT,BREAK
44 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46 1, 1, // EMIT, RESTART
47 1, 1, 1, // TEX, TXB, TXL,
48 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
53 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
56 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
57 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
64 const OpClass
Target::operationClass
[] =
66 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
69 OPCLASS_PSEUDO
, OPCLASS_PSEUDO
, OPCLASS_PSEUDO
, OPCLASS_PSEUDO
,
74 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
75 OPCLASS_ARITH
, OPCLASS_ARITH
, OPCLASS_ARITH
,
76 OPCLASS_ARITH
, OPCLASS_ARITH
,
77 OPCLASS_ARITH
, OPCLASS_ARITH
, OPCLASS_ARITH
, OPCLASS_ARITH
, OPCLASS_ARITH
,
78 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
79 OPCLASS_CONVERT
, OPCLASS_CONVERT
,
80 OPCLASS_LOGIC
, OPCLASS_LOGIC
, OPCLASS_LOGIC
, OPCLASS_LOGIC
,
81 OPCLASS_SHIFT
, OPCLASS_SHIFT
,
83 OPCLASS_COMPARE
, OPCLASS_COMPARE
,
84 // SAT, CEIL, FLOOR, TRUNC; CVT
85 OPCLASS_CONVERT
, OPCLASS_CONVERT
, OPCLASS_CONVERT
, OPCLASS_CONVERT
,
87 // SET(AND,OR,XOR); SELP, SLCT
88 OPCLASS_COMPARE
, OPCLASS_COMPARE
, OPCLASS_COMPARE
, OPCLASS_COMPARE
,
89 OPCLASS_COMPARE
, OPCLASS_COMPARE
,
90 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
91 OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
,
92 OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
, OPCLASS_SFU
,
93 OPCLASS_SFU
, OPCLASS_SFU
,
94 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
95 OPCLASS_FLOW
, OPCLASS_FLOW
, OPCLASS_FLOW
,
96 OPCLASS_FLOW
, OPCLASS_FLOW
, OPCLASS_FLOW
, OPCLASS_FLOW
, OPCLASS_FLOW
,
97 OPCLASS_FLOW
, OPCLASS_FLOW
, OPCLASS_FLOW
,
99 OPCLASS_FLOW
, OPCLASS_FLOW
,
102 // VFETCH, PFETCH, AFETCH, EXPORT
103 OPCLASS_LOAD
, OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_STORE
,
105 OPCLASS_SFU
, OPCLASS_SFU
,
107 OPCLASS_CONTROL
, OPCLASS_CONTROL
,
108 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
109 OPCLASS_TEXTURE
, OPCLASS_TEXTURE
, OPCLASS_TEXTURE
, OPCLASS_TEXTURE
,
110 OPCLASS_TEXTURE
, OPCLASS_TEXTURE
, OPCLASS_TEXTURE
, OPCLASS_TEXTURE
,
111 OPCLASS_TEXTURE
, OPCLASS_TEXTURE
,
112 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
113 OPCLASS_SURFACE
, OPCLASS_SURFACE
, OPCLASS_ATOMIC
, OPCLASS_SURFACE
,
114 OPCLASS_SURFACE
, OPCLASS_SURFACE
, OPCLASS_SURFACE
,
115 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
116 OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_ARITH
,
119 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
120 OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_OTHER
,
121 OPCLASS_OTHER
, OPCLASS_OTHER
, OPCLASS_CONTROL
, OPCLASS_CONTROL
,
122 // POPCNT, INSBF, EXTBF, BFIND; PERMT
123 OPCLASS_BITFIELD
, OPCLASS_BITFIELD
, OPCLASS_BITFIELD
, OPCLASS_BITFIELD
,
126 OPCLASS_ATOMIC
, OPCLASS_CONTROL
,
127 // VADD, VAVG, VMIN, VMAX
128 OPCLASS_VECTOR
, OPCLASS_VECTOR
, OPCLASS_VECTOR
, OPCLASS_VECTOR
,
129 // VSAD, VSET, VSHR, VSHL
130 OPCLASS_VECTOR
, OPCLASS_VECTOR
, OPCLASS_VECTOR
, OPCLASS_VECTOR
,
132 OPCLASS_VECTOR
, OPCLASS_CONTROL
,
139 OPCLASS_PSEUDO
// LAST
143 extern Target
*getTargetGM107(unsigned int chipset
);
144 extern Target
*getTargetNVC0(unsigned int chipset
);
145 extern Target
*getTargetNV50(unsigned int chipset
);
147 Target
*Target::create(unsigned int chipset
)
149 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr
) == OP_LAST
+ 1);
150 STATIC_ASSERT(ARRAY_SIZE(operationClass
) == OP_LAST
+ 1);
151 switch (chipset
& ~0xf) {
155 return getTargetGM107(chipset
);
161 return getTargetNVC0(chipset
);
166 return getTargetNV50(chipset
);
168 ERROR("unsupported target: NV%x\n", chipset
);
173 void Target::destroy(Target
*targ
)
178 CodeEmitter::CodeEmitter(const Target
*target
) : targ(target
), fixupInfo(NULL
)
183 CodeEmitter::setCodeLocation(void *ptr
, uint32_t size
)
185 code
= reinterpret_cast<uint32_t *>(ptr
);
187 codeSizeLimit
= size
;
191 CodeEmitter::printBinary() const
193 uint32_t *bin
= code
- codeSize
/ 4;
194 INFO("program binary (%u bytes)", codeSize
);
195 for (unsigned int pos
= 0; pos
< codeSize
/ 4; ++pos
) {
198 INFO("%08x ", bin
[pos
]);
203 static inline uint32_t sizeToBundlesNVE4(uint32_t size
)
205 return (size
+ 55) / 56;
209 CodeEmitter::prepareEmission(Program
*prog
)
211 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
212 !fi
.end(); fi
.next()) {
213 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
214 func
->binPos
= prog
->binSize
;
215 prepareEmission(func
);
217 // adjust sizes & positions for schedulding info:
218 if (prog
->getTarget()->hasSWSched
) {
219 uint32_t adjPos
= func
->binPos
;
220 BasicBlock
*bb
= NULL
;
221 for (int i
= 0; i
< func
->bbCount
; ++i
) {
222 bb
= func
->bbArray
[i
];
223 int32_t adjSize
= bb
->binSize
;
225 adjSize
-= 64 - adjPos
% 64;
229 adjSize
= bb
->binSize
+ sizeToBundlesNVE4(adjSize
) * 8;
231 bb
->binSize
= adjSize
;
235 func
->binSize
= adjPos
- func
->binPos
;
238 prog
->binSize
+= func
->binSize
;
243 CodeEmitter::prepareEmission(Function
*func
)
246 func
->bbArray
= new BasicBlock
* [func
->cfg
.getSize()];
248 BasicBlock::get(func
->cfg
.getRoot())->binPos
= func
->binPos
;
250 for (IteratorRef it
= func
->cfg
.iteratorCFG(); !it
->end(); it
->next())
251 prepareEmission(BasicBlock::get(*it
));
255 CodeEmitter::prepareEmission(BasicBlock
*bb
)
257 Instruction
*i
, *next
;
258 Function
*func
= bb
->getFunction();
262 for (j
= func
->bbCount
- 1; j
>= 0 && !func
->bbArray
[j
]->binSize
; --j
);
264 for (; j
>= 0; --j
) {
265 BasicBlock
*in
= func
->bbArray
[j
];
266 Instruction
*exit
= in
->getExit();
268 if (exit
&& exit
->op
== OP_BRA
&& exit
->asFlow()->target
.bb
== bb
) {
272 for (++j
; j
< func
->bbCount
; ++j
)
273 func
->bbArray
[j
]->binPos
-= 8;
277 bb
->binPos
= in
->binPos
+ in
->binSize
;
278 if (in
->binSize
) // no more no-op branches to bb
281 func
->bbArray
[func
->bbCount
++] = bb
;
286 // determine encoding size, try to group short instructions
288 for (i
= bb
->getEntry(); i
; i
= next
) {
291 if (i
->op
== OP_MEMBAR
&& !targ
->isOpSupported(OP_MEMBAR
, TYPE_NONE
)) {
296 i
->encSize
= getMinEncodingSize(i
);
297 if (next
&& i
->encSize
< 8)
300 if ((nShort
& 1) && next
&& getMinEncodingSize(next
) == 4) {
301 if (i
->isCommutationLegal(i
->next
)) {
302 bb
->permuteAdjacent(i
, next
);
308 if (i
->isCommutationLegal(i
->prev
) && next
->next
) {
309 bb
->permuteAdjacent(i
->prev
, i
);
316 i
->prev
->encSize
= 8;
323 i
->prev
->encSize
= 8;
328 bb
->binSize
+= i
->encSize
;
331 if (bb
->getExit()->encSize
== 4) {
333 bb
->getExit()->encSize
= 8;
336 if ((bb
->getExit()->prev
->encSize
== 4) && !(nShort
& 1)) {
338 bb
->getExit()->prev
->encSize
= 8;
341 assert(!bb
->getEntry() || (bb
->getExit() && bb
->getExit()->encSize
== 8));
343 func
->binSize
+= bb
->binSize
;
347 Program::emitSymbolTable(struct nv50_ir_prog_info
*info
)
349 unsigned int n
= 0, nMax
= allFuncs
.getSize();
352 (struct nv50_ir_prog_symbol
*)MALLOC(nMax
* sizeof(*info
->bin
.syms
));
354 for (ArrayList::Iterator fi
= allFuncs
.iterator();
357 Function
*f
= (Function
*)fi
.get();
360 info
->bin
.syms
[n
].label
= f
->getLabel();
361 info
->bin
.syms
[n
].offset
= f
->binPos
;
364 info
->bin
.numSyms
= n
;
368 Program::emitBinary(struct nv50_ir_prog_info
*info
)
370 CodeEmitter
*emit
= target
->getCodeEmitter(progType
);
372 emit
->prepareEmission(this);
374 if (dbgFlags
& NV50_IR_DEBUG_BASIC
)
381 code
= reinterpret_cast<uint32_t *>(MALLOC(binSize
));
384 emit
->setCodeLocation(code
, binSize
);
385 info
->bin
.instructions
= 0;
387 for (ArrayList::Iterator fi
= allFuncs
.iterator(); !fi
.end(); fi
.next()) {
388 Function
*fn
= reinterpret_cast<Function
*>(fi
.get());
390 assert(emit
->getCodeSize() == fn
->binPos
);
392 for (int b
= 0; b
< fn
->bbCount
; ++b
) {
393 for (Instruction
*i
= fn
->bbArray
[b
]->getEntry(); i
; i
= i
->next
) {
394 emit
->emitInstruction(i
);
395 info
->bin
.instructions
++;
396 if ((typeSizeof(i
->sType
) == 8 || typeSizeof(i
->dType
) == 8) &&
397 (isFloatType(i
->sType
) || isFloatType(i
->dType
)))
398 info
->io
.fp64
= true;
402 info
->bin
.relocData
= emit
->getRelocInfo();
403 info
->bin
.fixupData
= emit
->getFixupInfo();
405 emitSymbolTable(info
);
407 // the nvc0 driver will print the binary iself together with the header
408 if ((dbgFlags
& NV50_IR_DEBUG_BASIC
) && getTarget()->getChipset() < 0xc0)
415 #define RELOC_ALLOC_INCREMENT 8
418 CodeEmitter::addReloc(RelocEntry::Type ty
, int w
, uint32_t data
, uint32_t m
,
421 unsigned int n
= relocInfo
? relocInfo
->count
: 0;
423 if (!(n
% RELOC_ALLOC_INCREMENT
)) {
424 size_t size
= sizeof(RelocInfo
) + n
* sizeof(RelocEntry
);
425 relocInfo
= reinterpret_cast<RelocInfo
*>(
426 REALLOC(relocInfo
, n
? size
: 0,
427 size
+ RELOC_ALLOC_INCREMENT
* sizeof(RelocEntry
)));
431 memset(relocInfo
, 0, sizeof(RelocInfo
));
435 relocInfo
->entry
[n
].data
= data
;
436 relocInfo
->entry
[n
].mask
= m
;
437 relocInfo
->entry
[n
].offset
= codeSize
+ w
* 4;
438 relocInfo
->entry
[n
].bitPos
= s
;
439 relocInfo
->entry
[n
].type
= ty
;
445 CodeEmitter::addInterp(int ipa
, int reg
, FixupApply apply
)
447 unsigned int n
= fixupInfo
? fixupInfo
->count
: 0;
449 if (!(n
% RELOC_ALLOC_INCREMENT
)) {
450 size_t size
= sizeof(FixupInfo
) + n
* sizeof(FixupEntry
);
451 fixupInfo
= reinterpret_cast<FixupInfo
*>(
452 REALLOC(fixupInfo
, n
? size
: 0,
453 size
+ RELOC_ALLOC_INCREMENT
* sizeof(FixupEntry
)));
457 memset(fixupInfo
, 0, sizeof(FixupInfo
));
461 fixupInfo
->entry
[n
] = FixupEntry(apply
, ipa
, reg
, codeSize
>> 2);
467 RelocEntry::apply(uint32_t *binary
, const RelocInfo
*info
) const
472 case TYPE_CODE
: value
= info
->codePos
; break;
473 case TYPE_BUILTIN
: value
= info
->libPos
; break;
474 case TYPE_DATA
: value
= info
->dataPos
; break;
480 value
= (bitPos
< 0) ? (value
>> -bitPos
) : (value
<< bitPos
);
482 binary
[offset
/ 4] &= ~mask
;
483 binary
[offset
/ 4] |= value
& mask
;
486 } // namespace nv50_ir
489 #include "codegen/nv50_ir_driver.h"
494 nv50_ir_relocate_code(void *relocData
, uint32_t *code
,
499 nv50_ir::RelocInfo
*info
= reinterpret_cast<nv50_ir::RelocInfo
*>(relocData
);
501 info
->codePos
= codePos
;
502 info
->libPos
= libPos
;
503 info
->dataPos
= dataPos
;
505 for (unsigned int i
= 0; i
< info
->count
; ++i
)
506 info
->entry
[i
].apply(code
, info
);
510 nv50_ir_apply_fixups(void *fixupData
, uint32_t *code
,
511 bool force_persample_interp
, bool flatshade
,
514 nv50_ir::FixupInfo
*info
= reinterpret_cast<nv50_ir::FixupInfo
*>(
517 // force_persample_interp: all non-flat -> per-sample
518 // flatshade: all color -> flat
519 // alphatest: PIPE_FUNC_* to use with alphatest
520 nv50_ir::FixupData
data(force_persample_interp
, flatshade
, alphatest
);
521 for (unsigned i
= 0; i
< info
->count
; ++i
)
522 info
->entry
[i
].apply(&info
->entry
[i
], code
, data
);
526 nv50_ir_get_target_library(uint32_t chipset
,
527 const uint32_t **code
, uint32_t *size
)
529 nv50_ir::Target
*targ
= nv50_ir::Target::create(chipset
);
530 targ
->getBuiltinCode(code
, size
);
531 nv50_ir::Target::destroy(targ
);