14e877b2518ad85b2f9c069526aaeeada8ebec3d
1 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code emitters outputs bytecode that is understood by the r600g driver
11 // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12 // except that the size of the instruction fields are rounded up to the
15 // [1] http://www.mesa3d.org/
17 //===----------------------------------------------------------------------===//
20 #include "AMDGPUCodeEmitter.h"
21 #include "AMDGPUInstrInfo.h"
22 #include "AMDILUtilityFunctions.h"
23 #include "R600InstrInfo.h"
24 #include "R600RegisterInfo.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/Support/DataTypes.h"
29 #include "llvm/Support/FormattedStream.h"
30 #include "llvm/Target/TargetMachine.h"
34 #define SRC_BYTE_COUNT 11
35 #define DST_BYTE_COUNT 5
41 class R600CodeEmitter
: public MachineFunctionPass
, public AMDGPUCodeEmitter
{
46 formatted_raw_ostream
&_OS
;
47 const TargetMachine
* TM
;
48 const MachineRegisterInfo
* MRI
;
49 const R600RegisterInfo
* TRI
;
50 const R600InstrInfo
* TII
;
54 unsigned currentElement
;
57 unsigned section_start
;
61 R600CodeEmitter(formatted_raw_ostream
&OS
) : MachineFunctionPass(ID
),
62 _OS(OS
), TM(NULL
), IsCube(false), IsVector(false),
65 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
67 bool runOnMachineFunction(MachineFunction
&MF
);
68 virtual uint64_t getMachineOpValue(const MachineInstr
&MI
,
69 const MachineOperand
&MO
) const;
73 void EmitALUInstr(MachineInstr
&MI
);
74 void EmitSrc(const MachineOperand
& MO
, int chan_override
= -1);
75 void EmitDst(const MachineOperand
& MO
);
76 void EmitALU(MachineInstr
&MI
, unsigned numSrc
);
77 void EmitTexInstr(MachineInstr
&MI
);
78 void EmitFCInstr(MachineInstr
&MI
);
80 void EmitNullBytes(unsigned int byteCount
);
82 void EmitByte(unsigned int byte
);
84 void EmitTwoBytes(uint32_t bytes
);
86 void Emit(uint32_t value
);
87 void Emit(uint64_t value
);
89 unsigned getHWReg(unsigned regNo
) const;
93 } // End anonymous namespace
135 TEXTURE_SHADOW1D_ARRAY
,
136 TEXTURE_SHADOW2D_ARRAY
139 char R600CodeEmitter::ID
= 0;
141 FunctionPass
*llvm::createR600CodeEmitterPass(formatted_raw_ostream
&OS
) {
142 return new R600CodeEmitter(OS
);
145 bool R600CodeEmitter::runOnMachineFunction(MachineFunction
&MF
) {
147 TM
= &MF
.getTarget();
148 MRI
= &MF
.getRegInfo();
149 TRI
= static_cast<const R600RegisterInfo
*>(TM
->getRegisterInfo());
150 TII
= static_cast<const R600InstrInfo
*>(TM
->getInstrInfo());
151 const AMDGPUSubtarget
&STM
= TM
->getSubtarget
<AMDGPUSubtarget
>();
152 std::string gpu
= STM
.getDeviceName();
154 if (STM
.dumpCode()) {
158 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
160 MachineBasicBlock
&MBB
= *BB
;
161 for (MachineBasicBlock::instr_iterator I
= MBB
.instr_begin(),
162 E
= MBB
.instr_end(); I
!= E
; ++I
) {
163 MachineInstr
&MI
= *I
;
164 IsVector
= TII
->isVector(MI
);
165 IsCube
= TII
->isCubeOp(MI
.getOpcode());
166 if (MI
.getNumOperands() > 1 && MI
.getOperand(0).isReg() && MI
.getOperand(0).isDead()) {
169 if (TII
->isTexOp(MI
.getOpcode())) {
171 } else if (TII
->isFCOp(MI
.getOpcode())){
173 } else if (IsVector
|| IsCube
) {
175 // XXX: On Cayman, some (all?) of the vector instructions only need
176 // to fill the first three slots.
177 for (currentElement
= 0; currentElement
< 4; currentElement
++) {
178 IsLast
= (currentElement
== 3);
183 } else if (MI
.getOpcode() == AMDGPU::RETURN
||
184 MI
.getOpcode() == AMDGPU::BUNDLE
||
185 MI
.getOpcode() == AMDGPU::KILL
) {
188 switch(MI
.getOpcode()) {
189 case AMDGPU::RAT_WRITE_CACHELESS_eg
:
191 uint64_t inst
= getBinaryCodeForInstr(MI
);
192 // Set End Of Program bit
193 // XXX: Need better check of end of program. EOP should be
194 // encoded in one of the operands of the MI, and it should be
195 // set in a prior pass.
196 MachineBasicBlock::iterator NextI
= llvm::next(I
);
197 MachineInstr
&NextMI
= *NextI
;
198 if (NextMI
.getOpcode() == AMDGPU::RETURN
) {
199 inst
|= (((uint64_t)1) << 53);
201 EmitByte(INSTR_NATIVE
);
205 case AMDGPU::VTX_READ_PARAM_i32_eg
:
206 case AMDGPU::VTX_READ_PARAM_f32_eg
:
207 case AMDGPU::VTX_READ_GLOBAL_i32_eg
:
208 case AMDGPU::VTX_READ_GLOBAL_f32_eg
:
209 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg
:
210 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg
:
212 uint64_t InstWord01
= getBinaryCodeForInstr(MI
);
213 uint32_t InstWord2
= MI
.getOperand(2).getImm(); // Offset
231 void R600CodeEmitter::EmitALUInstr(MachineInstr
&MI
)
234 unsigned numOperands
= MI
.getNumExplicitOperands();
235 if(MI
.findFirstPredOperandIdx() > -1)
238 // Some instructions are just place holder instructions that represent
239 // operations that the GPU does automatically. They should be ignored.
240 if (TII
->isPlaceHolderOpcode(MI
.getOpcode())) {
244 if(MI
.getOpcode() == AMDGPU::PRED_X
)
247 // XXX Check if instruction writes a result
248 if (numOperands
< 1) {
251 const MachineOperand dstOp
= MI
.getOperand(0);
253 // Emit instruction type
257 static const int cube_src_swz
[] = {2, 2, 0, 1};
258 EmitSrc(MI
.getOperand(1), cube_src_swz
[currentElement
]);
259 EmitSrc(MI
.getOperand(1), cube_src_swz
[3-currentElement
]);
260 EmitNullBytes(SRC_BYTE_COUNT
);
262 unsigned int opIndex
;
263 for (opIndex
= 1; opIndex
< numOperands
; opIndex
++) {
264 // Literal constants are always stored as the last operand.
265 if (MI
.getOperand(opIndex
).isImm() || MI
.getOperand(opIndex
).isFPImm()) {
268 EmitSrc(MI
.getOperand(opIndex
));
271 // Emit zeros for unused sources
272 for ( ; opIndex
< 4; opIndex
++) {
273 EmitNullBytes(SRC_BYTE_COUNT
);
279 EmitALU(MI
, numOperands
- 1);
282 void R600CodeEmitter::EmitSrc(const MachineOperand
& MO
, int chan_override
)
285 // Emit the source select (2 bytes). For GPRs, this is the register index.
286 // For other potential instruction operands, (e.g. constant registers) the
287 // value of the source select is defined in the r600isa docs.
289 unsigned reg
= MO
.getReg();
290 EmitTwoBytes(getHWReg(reg
));
291 if (reg
== AMDGPU::ALU_LITERAL_X
) {
292 const MachineInstr
* parent
= MO
.getParent();
293 unsigned immOpIndex
= parent
->getNumExplicitOperands() - 1;
294 MachineOperand immOp
= parent
->getOperand(immOpIndex
);
295 if (immOp
.isFPImm()) {
296 value
= immOp
.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
298 assert(immOp
.isImm());
299 value
= immOp
.getImm();
303 // XXX: Handle other operand types.
307 // Emit the source channel (1 byte)
308 if (chan_override
!= -1) {
309 EmitByte(chan_override
);
310 } else if (MO
.isReg()) {
311 EmitByte(TRI
->getHWRegChan(MO
.getReg()));
316 // XXX: Emit isNegated (1 byte)
317 if ((!(MO
.getTargetFlags() & MO_FLAG_ABS
))
318 && (MO
.getTargetFlags() & MO_FLAG_NEG
||
320 (MO
.getReg() == AMDGPU::NEG_ONE
|| MO
.getReg() == AMDGPU::NEG_HALF
)))){
326 // Emit isAbsolute (1 byte)
327 if (MO
.getTargetFlags() & MO_FLAG_ABS
) {
333 // XXX: Emit relative addressing mode (1 byte)
336 // Emit kc_bank, This will be adjusted later by r600_asm
339 // Emit the literal value, if applicable (4 bytes).
344 void R600CodeEmitter::EmitDst(const MachineOperand
& MO
)
346 if (MO
.isReg() && MO
.getReg() != AMDGPU::PREDICATE_BIT
) {
347 // Emit the destination register index (1 byte)
348 EmitByte(getHWReg(MO
.getReg()));
350 // Emit the element of the destination register (1 byte)
351 if (IsCube
|| IsVector
) {
352 EmitByte(currentElement
);
354 EmitByte(TRI
->getHWRegChan(MO
.getReg()));
357 // Emit isClamped (1 byte)
358 if (MO
.getTargetFlags() & MO_FLAG_CLAMP
) {
364 // Emit writemask (1 byte).
366 currentElement
!= TRI
->getHWRegChan(MO
.getReg()))
367 || MO
.getTargetFlags() & MO_FLAG_MASK
) {
373 // XXX: Emit relative addressing mode
376 // XXX: Handle other operand types. Are there any for destination regs?
377 EmitNullBytes(DST_BYTE_COUNT
);
381 void R600CodeEmitter::EmitALU(MachineInstr
&MI
, unsigned numSrc
)
383 // Emit the instruction (2 bytes)
384 EmitTwoBytes(getBinaryCodeForInstr(MI
));
386 // Emit IsLast (for this instruction group) (1 byte)
388 (MI
.isInsideBundle() &&
389 !(MI
.getOperand(0).getTargetFlags() & MO_FLAG_LAST
))) {
395 // Emit isOp3 (1 byte)
402 // XXX: Emit push modifier
403 if(MI
.getOperand(1).getTargetFlags() & MO_FLAG_PUSH
) {
409 // XXX: Emit predicate (1 byte)
410 int predidx
= MI
.findFirstPredOperandIdx();
412 switch(MI
.getOperand(predidx
).getReg()) {
413 case AMDGPU::PRED_SEL_ZERO
:
416 case AMDGPU::PRED_SEL_ONE
:
428 // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
429 // r600_asm.c sets it.
432 // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
435 // XXX: Emit OMOD (1 byte) Not implemented.
438 // XXX: Emit index_mode. I think this is for indirect addressing, so we
439 // don't need to worry about it.
443 void R600CodeEmitter::EmitTexInstr(MachineInstr
&MI
)
446 unsigned opcode
= MI
.getOpcode();
447 bool hasOffsets
= (opcode
== AMDGPU::TEX_LD
);
448 unsigned op_offset
= hasOffsets
? 3 : 0;
449 int64_t sampler
= MI
.getOperand(op_offset
+2).getImm();
450 int64_t textureType
= MI
.getOperand(op_offset
+3).getImm();
451 unsigned srcSelect
[4] = {0, 1, 2, 3};
453 // Emit instruction type
457 EmitByte(getBinaryCodeForInstr(MI
));
459 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
460 EmitByte(sampler
+ 1 + 1);
462 // Emit source register
463 EmitByte(getHWReg(MI
.getOperand(1).getReg()));
465 // XXX: Emit src isRelativeAddress
468 // Emit destination register
469 EmitByte(getHWReg(MI
.getOperand(0).getReg()));
471 // XXX: Emit dst isRealtiveAddress
474 // XXX: Emit dst select
480 // XXX: Emit lod bias
483 // XXX: Emit coord types
484 unsigned coordType
[4] = {1, 1, 1, 1};
486 if (textureType
== TEXTURE_RECT
487 || textureType
== TEXTURE_SHADOWRECT
) {
488 coordType
[ELEMENT_X
] = 0;
489 coordType
[ELEMENT_Y
] = 0;
492 if (textureType
== TEXTURE_1D_ARRAY
493 || textureType
== TEXTURE_SHADOW1D_ARRAY
) {
494 if (opcode
== AMDGPU::TEX_SAMPLE_C_L
|| opcode
== AMDGPU::TEX_SAMPLE_C_LB
) {
495 coordType
[ELEMENT_Y
] = 0;
497 coordType
[ELEMENT_Z
] = 0;
498 srcSelect
[ELEMENT_Z
] = ELEMENT_Y
;
500 } else if (textureType
== TEXTURE_2D_ARRAY
501 || textureType
== TEXTURE_SHADOW2D_ARRAY
) {
502 coordType
[ELEMENT_Z
] = 0;
505 for (unsigned i
= 0; i
< 4; i
++) {
506 EmitByte(coordType
[i
]);
511 for (unsigned i
= 2; i
< 5; i
++)
512 EmitByte(MI
.getOperand(i
).getImm()<<1);
519 // XXX:Emit source select
520 if ((textureType
== TEXTURE_SHADOW1D
521 || textureType
== TEXTURE_SHADOW2D
522 || textureType
== TEXTURE_SHADOWRECT
523 || textureType
== TEXTURE_SHADOW1D_ARRAY
)
524 && opcode
!= AMDGPU::TEX_SAMPLE_C_L
525 && opcode
!= AMDGPU::TEX_SAMPLE_C_LB
) {
526 srcSelect
[ELEMENT_W
] = ELEMENT_Z
;
529 for (unsigned i
= 0; i
< 4; i
++) {
530 EmitByte(srcSelect
[i
]);
534 void R600CodeEmitter::EmitFCInstr(MachineInstr
&MI
)
536 // Emit instruction type
540 unsigned numOperands
= MI
.getNumOperands();
541 if (numOperands
> 0) {
542 assert(numOperands
== 1);
543 EmitSrc(MI
.getOperand(0));
545 EmitNullBytes(SRC_BYTE_COUNT
);
548 // Emit FC Instruction
550 switch (MI
.getOpcode()) {
551 case AMDGPU::BREAK_LOGICALZ_f32
:
554 case AMDGPU::BREAK_LOGICALNZ_f32
:
557 case AMDGPU::BREAK_LOGICALNZ_i32
:
558 instr
= FC_BREAK_NZ_INT
;
560 case AMDGPU::BREAK_LOGICALZ_i32
:
561 instr
= FC_BREAK_Z_INT
;
563 case AMDGPU::CONTINUE_LOGICALNZ_f32
:
564 case AMDGPU::CONTINUE_LOGICALNZ_i32
:
567 case AMDGPU::IF_LOGICALNZ_f32
:
569 case AMDGPU::IF_LOGICALNZ_i32
:
572 case AMDGPU::IF_LOGICALZ_f32
:
581 case AMDGPU::ENDLOOP
:
584 case AMDGPU::WHILELOOP
:
594 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount
)
596 for (unsigned int i
= 0; i
< byteCount
; i
++) {
601 void R600CodeEmitter::EmitByte(unsigned int byte
)
603 _OS
.write((uint8_t) byte
& 0xff);
605 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes
)
607 _OS
.write((uint8_t) (bytes
& 0xff));
608 _OS
.write((uint8_t) ((bytes
>> 8) & 0xff));
611 void R600CodeEmitter::Emit(uint32_t value
)
613 for (unsigned i
= 0; i
< 4; i
++) {
614 _OS
.write((uint8_t) ((value
>> (8 * i
)) & 0xff));
618 void R600CodeEmitter::Emit(uint64_t value
)
620 for (unsigned i
= 0; i
< 8; i
++) {
621 EmitByte((value
>> (8 * i
)) & 0xff);
625 unsigned R600CodeEmitter::getHWReg(unsigned regNo
) const
629 HWReg
= TRI
->getHWRegIndex(regNo
);
630 if (AMDGPU::R600_CReg32RegClass
.contains(regNo
)) {
636 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr
&MI
,
637 const MachineOperand
&MO
) const
640 return getHWReg(MO
.getReg());
646 #include "AMDGPUGenCodeEmitter.inc"