2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50/codegen/nv50_ir.h"
24 #include "nv50/codegen/nv50_ir_target.h"
28 const uint8_t Target::operationSrcNr
[OP_LAST
+ 1] =
31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
32 1, 1, 2, // MOV, LOAD, STORE
33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34 1, 1, 1, // ABS, NEG, NOT
35 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
36 2, 2, 1, // MAX, MIN, SAT
37 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
38 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
39 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2
40 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
42 0, 0, 0, // PRERET,CONT,BREAK
43 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45 1, 1, // EMIT, RESTART
46 1, 1, 1, // TEX, TXB, TXL,
47 1, 1, 1, 1, 1, // TXF, TXQ, TXD, TXG, TEXCSAA
50 1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
51 2, 3, 2, // POPCNT, INSBF, EXTBF
56 extern Target
*getTargetNVC0(unsigned int chipset
);
58 Target
*Target::create(unsigned int chipset
)
60 switch (chipset
& 0xf0) {
63 return getTargetNVC0(chipset
);
69 ERROR("unsupported target: NV%x\n", chipset
);
74 void Target::destroy(Target
*targ
)
80 CodeEmitter::setCodeLocation(void *ptr
, uint32_t size
)
82 code
= reinterpret_cast<uint32_t *>(ptr
);
88 CodeEmitter::printBinary() const
90 uint32_t *bin
= code
- codeSize
/ 4;
91 INFO("program binary (%u bytes)", codeSize
);
92 for (unsigned int pos
= 0; pos
< codeSize
/ 4; ++pos
) {
95 INFO("%08x ", bin
[pos
]);
101 CodeEmitter::prepareEmission(Program
*prog
)
103 for (ArrayList::Iterator fi
= prog
->allFuncs
.iterator();
104 !fi
.end(); fi
.next()) {
105 Function
*func
= reinterpret_cast<Function
*>(fi
.get());
106 func
->binPos
= prog
->binSize
;
107 prepareEmission(func
);
108 prog
->binSize
+= func
->binSize
;
113 CodeEmitter::prepareEmission(Function
*func
)
116 func
->bbArray
= new BasicBlock
* [func
->cfg
.getSize()];
118 BasicBlock::get(func
->cfg
.getRoot())->binPos
= func
->binPos
;
120 Graph::GraphIterator
*iter
;
121 for (iter
= func
->cfg
.iteratorCFG(); !iter
->end(); iter
->next())
122 prepareEmission(BasicBlock::get(*iter
));
123 func
->cfg
.putIterator(iter
);
127 CodeEmitter::prepareEmission(BasicBlock
*bb
)
129 Instruction
*i
, *next
;
130 Function
*func
= bb
->getFunction();
134 for (j
= func
->bbCount
- 1; j
>= 0 && !func
->bbArray
[j
]->binSize
; --j
);
136 for (; j
>= 0; --j
) {
137 BasicBlock
*in
= func
->bbArray
[j
];
138 Instruction
*exit
= in
->getExit();
140 if (exit
&& exit
->op
== OP_BRA
&& exit
->asFlow()->target
.bb
== bb
) {
144 for (++j
; j
< func
->bbCount
; ++j
)
145 func
->bbArray
[j
]->binPos
-= 8;
149 bb
->binPos
= in
->binPos
+ in
->binSize
;
150 if (in
->binSize
) // no more no-op branches to bb
153 func
->bbArray
[func
->bbCount
++] = bb
;
158 // determine encoding size, try to group short instructions
160 for (i
= bb
->getEntry(); i
; i
= next
) {
163 i
->encSize
= getMinEncodingSize(i
);
164 if (next
&& i
->encSize
< 8)
167 if ((nShort
& 1) && next
&& getMinEncodingSize(next
) == 4) {
168 if (i
->isCommutationLegal(i
->next
)) {
169 bb
->permuteAdjacent(i
, next
);
175 if (i
->isCommutationLegal(i
->prev
) && next
->next
) {
176 bb
->permuteAdjacent(i
->prev
, i
);
183 i
->prev
->encSize
= 8;
190 i
->prev
->encSize
= 8;
195 bb
->binSize
+= i
->encSize
;
198 if (bb
->getExit()->encSize
== 4) {
200 bb
->getExit()->encSize
= 8;
203 if ((bb
->getExit()->prev
->encSize
== 4) && !(nShort
& 1)) {
205 bb
->getExit()->prev
->encSize
= 8;
208 assert(!bb
->getEntry() || (bb
->getExit() && bb
->getExit()->encSize
== 8));
210 func
->binSize
+= bb
->binSize
;
214 Program::emitBinary(struct nv50_ir_prog_info
*info
)
216 CodeEmitter
*emit
= target
->getCodeEmitter(progType
);
218 emit
->prepareEmission(this);
220 if (dbgFlags
& NV50_IR_DEBUG_BASIC
)
227 code
= reinterpret_cast<uint32_t *>(MALLOC(binSize
));
230 emit
->setCodeLocation(code
, binSize
);
232 for (ArrayList::Iterator fi
= allFuncs
.iterator(); !fi
.end(); fi
.next()) {
233 Function
*fn
= reinterpret_cast<Function
*>(fi
.get());
235 assert(emit
->getCodeSize() == fn
->binPos
);
237 for (int b
= 0; b
< fn
->bbCount
; ++b
)
238 for (Instruction
*i
= fn
->bbArray
[b
]->getEntry(); i
; i
= i
->next
)
239 emit
->emitInstruction(i
);
241 info
->bin
.relocData
= emit
->getRelocInfo();
247 #define RELOC_ALLOC_INCREMENT 8
250 CodeEmitter::addReloc(RelocEntry::Type ty
, int w
, uint32_t data
, uint32_t m
,
253 unsigned int n
= relocInfo
? relocInfo
->count
: 0;
255 if (!(n
% RELOC_ALLOC_INCREMENT
)) {
256 size_t size
= sizeof(RelocInfo
) + n
* sizeof(RelocEntry
);
257 relocInfo
= reinterpret_cast<RelocInfo
*>(
258 REALLOC(relocInfo
, n
? size
: 0,
259 size
+ RELOC_ALLOC_INCREMENT
* sizeof(RelocEntry
)));
263 memset(relocInfo
, 0, sizeof(RelocInfo
));
267 relocInfo
->entry
[n
].data
= data
;
268 relocInfo
->entry
[n
].mask
= m
;
269 relocInfo
->entry
[n
].offset
= codeSize
+ w
* 4;
270 relocInfo
->entry
[n
].bitPos
= s
;
271 relocInfo
->entry
[n
].type
= ty
;
277 RelocEntry::apply(uint32_t *binary
, const RelocInfo
*info
) const
282 case TYPE_CODE
: value
= info
->codePos
; break;
283 case TYPE_BUILTIN
: value
= info
->libPos
; break;
284 case TYPE_DATA
: value
= info
->dataPos
; break;
290 value
= (bitPos
< 0) ? (value
>> -bitPos
) : (value
<< bitPos
);
292 binary
[offset
/ 4] &= ~mask
;
293 binary
[offset
/ 4] |= value
& mask
;
296 } // namespace nv50_ir
299 #include "nv50/codegen/nv50_ir_driver.h"
304 nv50_ir_relocate_code(void *relocData
, uint32_t *code
,
309 nv50_ir::RelocInfo
*info
= reinterpret_cast<nv50_ir::RelocInfo
*>(relocData
);
311 info
->codePos
= codePos
;
312 info
->libPos
= libPos
;
313 info
->dataPos
= dataPos
;
315 for (unsigned int i
= 0; i
< info
->count
; ++i
)
316 info
->entry
[i
].apply(code
, info
);
320 nv50_ir_get_target_library(uint32_t chipset
,
321 const uint32_t **code
, uint32_t *size
)
323 nv50_ir::Target
*targ
= nv50_ir::Target::create(chipset
);
324 targ
->getBuiltinCode(code
, size
);
325 nv50_ir::Target::destroy(targ
);