1 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code emitters outputs bytecode that is understood by the r600g driver
11 // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12 // except that the size of the instruction fields are rounded up to the
15 // [1] http://www.mesa3d.org/
17 //===----------------------------------------------------------------------===//
20 #include "AMDGPUUtil.h"
21 #include "AMDILCodeEmitter.h"
22 #include "AMDILInstrInfo.h"
23 #include "AMDILUtilityFunctions.h"
24 #include "R600InstrInfo.h"
25 #include "R600RegisterInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Support/FormattedStream.h"
31 #include "llvm/Target/TargetMachine.h"
35 #define SRC_BYTE_COUNT 11
36 #define DST_BYTE_COUNT 5
42 class R600CodeEmitter
: public MachineFunctionPass
, public AMDILCodeEmitter
{
47 formatted_raw_ostream
&_OS
;
48 const TargetMachine
* TM
;
49 const MachineRegisterInfo
* MRI
;
50 const R600RegisterInfo
* TRI
;
55 unsigned currentElement
;
58 unsigned section_start
;
62 R600CodeEmitter(formatted_raw_ostream
&OS
) : MachineFunctionPass(ID
),
63 _OS(OS
), TM(NULL
), IsCube(false), IsReduction(false), IsVector(false),
66 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
68 bool runOnMachineFunction(MachineFunction
&MF
);
69 virtual uint64_t getMachineOpValue(const MachineInstr
&MI
,
70 const MachineOperand
&MO
) const;
74 void EmitALUInstr(MachineInstr
&MI
);
75 void EmitSrc(const MachineOperand
& MO
, int chan_override
= -1);
76 void EmitDst(const MachineOperand
& MO
);
77 void EmitALU(MachineInstr
&MI
, unsigned numSrc
);
78 void EmitTexInstr(MachineInstr
&MI
);
79 void EmitFCInstr(MachineInstr
&MI
);
81 void EmitNullBytes(unsigned int byteCount
);
83 void EmitByte(unsigned int byte
);
85 void EmitTwoBytes(uint32_t bytes
);
87 void Emit(uint32_t value
);
88 void Emit(uint64_t value
);
90 unsigned getHWReg(unsigned regNo
) const;
94 } // End anonymous namespace
135 TEXTURE_SHADOW1D_ARRAY
,
136 TEXTURE_SHADOW2D_ARRAY
139 char R600CodeEmitter::ID
= 0;
141 FunctionPass
*llvm::createR600CodeEmitterPass(formatted_raw_ostream
&OS
) {
142 return new R600CodeEmitter(OS
);
145 bool R600CodeEmitter::runOnMachineFunction(MachineFunction
&MF
) {
147 TM
= &MF
.getTarget();
148 MRI
= &MF
.getRegInfo();
149 TRI
= static_cast<const R600RegisterInfo
*>(TM
->getRegisterInfo());
150 const R600InstrInfo
* TII
= static_cast<const R600InstrInfo
*>(TM
->getInstrInfo());
151 const AMDILSubtarget
&STM
= TM
->getSubtarget
<AMDILSubtarget
>();
152 std::string gpu
= STM
.getDeviceName();
154 if (STM
.dumpCode()) {
158 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
160 MachineBasicBlock
&MBB
= *BB
;
161 for (MachineBasicBlock::iterator I
= MBB
.begin(), E
= MBB
.end();
163 MachineInstr
&MI
= *I
;
164 IsReduction
= AMDGPU::isReductionOp(MI
.getOpcode());
165 IsVector
= TII
->isVector(MI
);
166 IsCube
= AMDGPU::isCubeOp(MI
.getOpcode());
167 if (MI
.getNumOperands() > 1 && MI
.getOperand(0).isReg() && MI
.getOperand(0).isDead()) {
170 if (AMDGPU::isTexOp(MI
.getOpcode())) {
172 } else if (AMDGPU::isFCOp(MI
.getOpcode())){
174 } else if (IsReduction
|| IsVector
|| IsCube
) {
176 // XXX: On Cayman, some (all?) of the vector instructions only need
177 // to fill the first three slots.
178 for (currentElement
= 0; currentElement
< 4; currentElement
++) {
179 IsLast
= (currentElement
== 3);
185 } else if (MI
.getOpcode() == AMDGPU::RETURN
||
186 MI
.getOpcode() == AMDGPU::BUNDLE
||
187 MI
.getOpcode() == AMDGPU::KILL
) {
190 switch(MI
.getOpcode()) {
191 case AMDGPU::RAT_WRITE_CACHELESS_eg
:
193 uint64_t inst
= getBinaryCodeForInstr(MI
);
194 // Set End Of Program bit
195 // XXX: Need better check of end of program. EOP should be
196 // encoded in one of the operands of the MI, and it should be
197 // set in a prior pass.
198 MachineBasicBlock::iterator NextI
= llvm::next(I
);
199 MachineInstr
&NextMI
= *NextI
;
200 if (NextMI
.getOpcode() == AMDGPU::RETURN
) {
201 inst
|= (((uint64_t)1) << 53);
203 EmitByte(INSTR_NATIVE
);
207 case AMDGPU::VTX_READ_PARAM_i32_eg
:
208 case AMDGPU::VTX_READ_PARAM_f32_eg
:
209 case AMDGPU::VTX_READ_GLOBAL_i32_eg
:
210 case AMDGPU::VTX_READ_GLOBAL_f32_eg
:
211 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg
:
212 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg
:
214 uint64_t InstWord01
= getBinaryCodeForInstr(MI
);
215 uint32_t InstWord2
= MI
.getOperand(2).getImm(); // Offset
233 void R600CodeEmitter::EmitALUInstr(MachineInstr
&MI
)
236 unsigned numOperands
= MI
.getNumExplicitOperands();
238 // Some instructions are just place holder instructions that represent
239 // operations that the GPU does automatically. They should be ignored.
240 if (AMDGPU::isPlaceHolderOpcode(MI
.getOpcode())) {
244 // XXX Check if instruction writes a result
245 if (numOperands
< 1) {
248 const MachineOperand dstOp
= MI
.getOperand(0);
250 // Emit instruction type
254 static const int cube_src_swz
[] = {2, 2, 0, 1};
255 EmitSrc(MI
.getOperand(1), cube_src_swz
[currentElement
]);
256 EmitSrc(MI
.getOperand(1), cube_src_swz
[3-currentElement
]);
257 EmitNullBytes(SRC_BYTE_COUNT
);
259 unsigned int opIndex
;
260 for (opIndex
= 1; opIndex
< numOperands
; opIndex
++) {
261 // Literal constants are always stored as the last operand.
262 if (MI
.getOperand(opIndex
).isImm() || MI
.getOperand(opIndex
).isFPImm()) {
265 EmitSrc(MI
.getOperand(opIndex
));
268 // Emit zeros for unused sources
269 for ( ; opIndex
< 4; opIndex
++) {
270 EmitNullBytes(SRC_BYTE_COUNT
);
276 EmitALU(MI
, numOperands
- 1);
279 void R600CodeEmitter::EmitSrc(const MachineOperand
& MO
, int chan_override
)
282 // Emit the source select (2 bytes). For GPRs, this is the register index.
283 // For other potential instruction operands, (e.g. constant registers) the
284 // value of the source select is defined in the r600isa docs.
286 unsigned reg
= MO
.getReg();
287 EmitTwoBytes(getHWReg(reg
));
288 if (reg
== AMDGPU::ALU_LITERAL_X
) {
289 const MachineInstr
* parent
= MO
.getParent();
290 unsigned immOpIndex
= parent
->getNumExplicitOperands() - 1;
291 MachineOperand immOp
= parent
->getOperand(immOpIndex
);
292 if (immOp
.isFPImm()) {
293 value
= immOp
.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
295 assert(immOp
.isImm());
296 value
= immOp
.getImm();
300 // XXX: Handle other operand types.
304 // Emit the source channel (1 byte)
305 if (chan_override
!= -1) {
306 EmitByte(chan_override
);
307 } else if (IsReduction
) {
308 EmitByte(currentElement
);
309 } else if (MO
.isReg()) {
310 EmitByte(TRI
->getHWRegChan(MO
.getReg()));
315 // XXX: Emit isNegated (1 byte)
316 if ((!(MO
.getTargetFlags() & MO_FLAG_ABS
))
317 && (MO
.getTargetFlags() & MO_FLAG_NEG
||
319 (MO
.getReg() == AMDGPU::NEG_ONE
|| MO
.getReg() == AMDGPU::NEG_HALF
)))){
325 // Emit isAbsolute (1 byte)
326 if (MO
.getTargetFlags() & MO_FLAG_ABS
) {
332 // XXX: Emit relative addressing mode (1 byte)
335 // Emit kc_bank, This will be adjusted later by r600_asm
338 // Emit the literal value, if applicable (4 bytes).
343 void R600CodeEmitter::EmitDst(const MachineOperand
& MO
)
346 // Emit the destination register index (1 byte)
347 EmitByte(getHWReg(MO
.getReg()));
349 // Emit the element of the destination register (1 byte)
350 if (IsReduction
|| IsCube
|| IsVector
) {
351 EmitByte(currentElement
);
353 EmitByte(TRI
->getHWRegChan(MO
.getReg()));
356 // Emit isClamped (1 byte)
357 if (MO
.getTargetFlags() & MO_FLAG_CLAMP
) {
363 // Emit writemask (1 byte).
364 if (((IsReduction
|| IsVector
) &&
365 currentElement
!= TRI
->getHWRegChan(MO
.getReg()))
366 || MO
.getTargetFlags() & MO_FLAG_MASK
) {
372 // XXX: Emit relative addressing mode
375 // XXX: Handle other operand types. Are there any for destination regs?
376 EmitNullBytes(DST_BYTE_COUNT
);
380 void R600CodeEmitter::EmitALU(MachineInstr
&MI
, unsigned numSrc
)
382 // Emit the instruction (2 bytes)
383 EmitTwoBytes(getBinaryCodeForInstr(MI
));
385 // Emit IsLast (for this instruction group) (1 byte)
391 // Emit isOp3 (1 byte)
398 // XXX: Emit predicate (1 byte)
401 // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
402 // r600_asm.c sets it.
405 // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
408 // XXX: Emit OMOD (1 byte) Not implemented.
411 // XXX: Emit index_mode. I think this is for indirect addressing, so we
412 // don't need to worry about it.
416 void R600CodeEmitter::EmitTexInstr(MachineInstr
&MI
)
419 unsigned opcode
= MI
.getOpcode();
420 bool hasOffsets
= (opcode
== AMDGPU::TEX_LD
);
421 unsigned op_offset
= hasOffsets
? 3 : 0;
422 int64_t sampler
= MI
.getOperand(op_offset
+2).getImm();
423 int64_t textureType
= MI
.getOperand(op_offset
+3).getImm();
424 unsigned srcSelect
[4] = {0, 1, 2, 3};
426 // Emit instruction type
430 EmitByte(getBinaryCodeForInstr(MI
));
432 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
433 EmitByte(sampler
+ 1 + 1);
435 // Emit source register
436 EmitByte(getHWReg(MI
.getOperand(1).getReg()));
438 // XXX: Emit src isRelativeAddress
441 // Emit destination register
442 EmitByte(getHWReg(MI
.getOperand(0).getReg()));
444 // XXX: Emit dst isRealtiveAddress
447 // XXX: Emit dst select
453 // XXX: Emit lod bias
456 // XXX: Emit coord types
457 unsigned coordType
[4] = {1, 1, 1, 1};
459 if (textureType
== TEXTURE_RECT
460 || textureType
== TEXTURE_SHADOWRECT
) {
461 coordType
[ELEMENT_X
] = 0;
462 coordType
[ELEMENT_Y
] = 0;
465 if (textureType
== TEXTURE_1D_ARRAY
466 || textureType
== TEXTURE_SHADOW1D_ARRAY
) {
467 if (opcode
== AMDGPU::TEX_SAMPLE_C_L
|| opcode
== AMDGPU::TEX_SAMPLE_C_LB
) {
468 coordType
[ELEMENT_Y
] = 0;
470 coordType
[ELEMENT_Z
] = 0;
471 srcSelect
[ELEMENT_Z
] = ELEMENT_Y
;
473 } else if (textureType
== TEXTURE_2D_ARRAY
474 || textureType
== TEXTURE_SHADOW2D_ARRAY
) {
475 coordType
[ELEMENT_Z
] = 0;
478 for (unsigned i
= 0; i
< 4; i
++) {
479 EmitByte(coordType
[i
]);
484 for (unsigned i
= 2; i
< 5; i
++)
485 EmitByte(MI
.getOperand(i
).getImm()<<1);
492 // XXX:Emit source select
493 if ((textureType
== TEXTURE_SHADOW1D
494 || textureType
== TEXTURE_SHADOW2D
495 || textureType
== TEXTURE_SHADOWRECT
496 || textureType
== TEXTURE_SHADOW1D_ARRAY
)
497 && opcode
!= AMDGPU::TEX_SAMPLE_C_L
498 && opcode
!= AMDGPU::TEX_SAMPLE_C_LB
) {
499 srcSelect
[ELEMENT_W
] = ELEMENT_Z
;
502 for (unsigned i
= 0; i
< 4; i
++) {
503 EmitByte(srcSelect
[i
]);
507 void R600CodeEmitter::EmitFCInstr(MachineInstr
&MI
)
509 // Emit instruction type
513 unsigned numOperands
= MI
.getNumOperands();
514 if (numOperands
> 0) {
515 assert(numOperands
== 1);
516 EmitSrc(MI
.getOperand(0));
518 EmitNullBytes(SRC_BYTE_COUNT
);
521 // Emit FC Instruction
523 switch (MI
.getOpcode()) {
524 case AMDGPU::BREAK_LOGICALZ_f32
:
527 case AMDGPU::BREAK_LOGICALNZ_f32
:
528 case AMDGPU::BREAK_LOGICALNZ_i32
:
529 instr
= FC_BREAK_NZ_INT
;
531 case AMDGPU::BREAK_LOGICALZ_i32
:
532 instr
= FC_BREAK_Z_INT
;
534 case AMDGPU::CONTINUE_LOGICALNZ_f32
:
535 case AMDGPU::CONTINUE_LOGICALNZ_i32
:
538 case AMDGPU::IF_LOGICALNZ_f32
:
540 case AMDGPU::IF_LOGICALNZ_i32
:
543 case AMDGPU::IF_LOGICALZ_f32
:
552 case AMDGPU::ENDLOOP
:
555 case AMDGPU::WHILELOOP
:
565 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount
)
567 for (unsigned int i
= 0; i
< byteCount
; i
++) {
572 void R600CodeEmitter::EmitByte(unsigned int byte
)
574 _OS
.write((uint8_t) byte
& 0xff);
576 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes
)
578 _OS
.write((uint8_t) (bytes
& 0xff));
579 _OS
.write((uint8_t) ((bytes
>> 8) & 0xff));
582 void R600CodeEmitter::Emit(uint32_t value
)
584 for (unsigned i
= 0; i
< 4; i
++) {
585 _OS
.write((uint8_t) ((value
>> (8 * i
)) & 0xff));
589 void R600CodeEmitter::Emit(uint64_t value
)
591 for (unsigned i
= 0; i
< 8; i
++) {
592 EmitByte((value
>> (8 * i
)) & 0xff);
596 unsigned R600CodeEmitter::getHWReg(unsigned regNo
) const
600 HWReg
= TRI
->getHWRegIndex(regNo
);
601 if (AMDGPU::R600_CReg32RegClass
.contains(regNo
)) {
607 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr
&MI
,
608 const MachineOperand
&MO
) const
611 return getHWReg(MO
.getReg());
617 #include "AMDGPUGenCodeEmitter.inc"