1 //===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // TODO: Add full description
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUUtil.h"
16 #include "AMDILCodeEmitter.h"
17 #include "AMDILInstrInfo.h"
18 #include "AMDILMachineFunctionInfo.h"
19 #include "AMDILUtilityFunctions.h"
20 #include "R600RegisterInfo.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/DataTypes.h"
25 #include "llvm/Support/FormattedStream.h"
26 #include "llvm/Target/TargetMachine.h"
30 #define SRC_BYTE_COUNT 11
31 #define DST_BYTE_COUNT 5
37 class R600CodeEmitter
: public MachineFunctionPass
, public AMDILCodeEmitter
{
42 formatted_raw_ostream
&_OS
;
43 const TargetMachine
* TM
;
44 const MachineRegisterInfo
* MRI
;
45 AMDILMachineFunctionInfo
* MFI
;
46 const R600RegisterInfo
* TRI
;
47 bool evergreenEncoding
;
50 unsigned reductionElement
;
53 unsigned section_start
;
57 R600CodeEmitter(formatted_raw_ostream
&OS
) : MachineFunctionPass(ID
),
58 _OS(OS
), TM(NULL
), evergreenEncoding(false), isReduction(false),
61 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
63 bool runOnMachineFunction(MachineFunction
&MF
);
64 virtual uint64_t getMachineOpValue(const MachineInstr
&MI
,
65 const MachineOperand
&MO
) const;
69 void emitALUInstr(MachineInstr
&MI
);
70 void emitSrc(const MachineOperand
& MO
);
71 void emitDst(const MachineOperand
& MO
);
72 void emitALU(MachineInstr
&MI
, unsigned numSrc
);
73 void emitTexInstr(MachineInstr
&MI
);
74 void emitFCInstr(MachineInstr
&MI
);
76 unsigned int getHWInst(const MachineInstr
&MI
);
78 void emitNullBytes(unsigned int byteCount
);
80 void emitByte(unsigned int byte
);
82 void emitTwoBytes(uint32_t bytes
);
84 void emit(uint32_t value
);
85 void emit(uint64_t value
);
87 unsigned getHWReg(unsigned regNo
) const;
89 unsigned getElement(unsigned regNo
);
93 } /* End anonymous namespace */
95 #define WRITE_MASK_X 0x1
96 #define WRITE_MASK_Y 0x2
97 #define WRITE_MASK_Z 0x4
98 #define WRITE_MASK_W 0x8
138 TEXTURE_SHADOW1D_ARRAY
,
139 TEXTURE_SHADOW2D_ARRAY
142 char R600CodeEmitter::ID
= 0;
144 FunctionPass
*llvm::createR600CodeEmitterPass(formatted_raw_ostream
&OS
) {
145 return new R600CodeEmitter(OS
);
148 bool R600CodeEmitter::runOnMachineFunction(MachineFunction
&MF
) {
150 TM
= &MF
.getTarget();
151 MRI
= &MF
.getRegInfo();
152 MFI
= MF
.getInfo
<AMDILMachineFunctionInfo
>();
153 TRI
= static_cast<const R600RegisterInfo
*>(TM
->getRegisterInfo());
154 const AMDILSubtarget
&STM
= TM
->getSubtarget
<AMDILSubtarget
>();
155 std::string gpu
= STM
.getDeviceName();
156 if (!gpu
.compare(0,3, "rv7")) {
157 evergreenEncoding
= false;
159 evergreenEncoding
= true;
161 const AMDGPUTargetMachine
*amdtm
=
162 static_cast<const AMDGPUTargetMachine
*>(&MF
.getTarget());
164 if (amdtm
->shouldDumpCode()) {
168 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
170 MachineBasicBlock
&MBB
= *BB
;
171 for (MachineBasicBlock::iterator I
= MBB
.begin(), E
= MBB
.end();
173 MachineInstr
&MI
= *I
;
174 if (MI
.getNumOperands() > 1 && MI
.getOperand(0).isReg() && MI
.getOperand(0).isDead()) {
177 if (isTexOp(MI
.getOpcode())) {
179 } else if (isFCOp(MI
.getOpcode())){
181 } else if (isReductionOp(MI
.getOpcode())) {
184 for (reductionElement
= 0; reductionElement
< 4; reductionElement
++) {
185 isLast
= (reductionElement
== 3);
189 } else if (MI
.getOpcode() == AMDIL::RETURN
) {
192 switch(MI
.getOpcode()) {
193 case AMDIL::RAT_WRITE_CACHELESS_eg
:
195 /* XXX: Support for autoencoding 64-bit instructions was added
196 * in LLVM 3.1. Until we drop support for 3.0, we will use Magic
197 * numbers for the high bits. */
198 uint64_t high
= 0x95c0100000000000;
199 uint64_t inst
= getBinaryCodeForInstr(MI
);
201 /* Set End Of Program bit */
202 /* XXX: Need better check of end of program. EOP should be
203 * encoded in one of the operands of the MI, and it should be
204 * set in a prior pass. */
205 MachineBasicBlock::iterator NextI
= llvm::next(I
);
206 MachineInstr
&NextMI
= *NextI
;
207 if (NextMI
.getOpcode() == AMDIL::RETURN
) {
208 inst
|= (((uint64_t)1) << 53);
210 emitByte(INSTR_NATIVE
);
214 case AMDIL::VTX_READ_eg
:
224 emitByte(MI
.getOperand(2).getImm());
227 emitByte(getHWReg(MI
.getOperand(1).getReg()));
230 emitByte(TRI
->getHWRegChan(MI
.getOperand(1).getReg()));
232 /* mega_fetch_count */
236 emitByte(getHWReg(MI
.getOperand(0).getReg()));
250 /* use_const_fields */
259 /* format_comp_all */
283 void R600CodeEmitter::emitALUInstr(MachineInstr
&MI
)
286 unsigned numOperands
= MI
.getNumOperands();
288 /* Some instructions are just place holder instructions that represent
289 * operations that the GPU does automatically. They should be ignored. */
290 if (isPlaceHolderOpcode(MI
.getOpcode())) {
294 /* We need to handle some opcodes differently */
295 switch (MI
.getOpcode()) {
298 /* Custom swizzle instructions, ignore the last two operands */
299 case AMDIL::SET_CHAN
:
303 case AMDIL::VEXTRACT_v4f32
:
308 case AMDIL::STORE_OUTPUT
:
313 /* XXX Check if instruction writes a result */
314 if (numOperands
< 1) {
317 const MachineOperand dstOp
= MI
.getOperand(0);
319 /* Emit instruction type */
322 unsigned int opIndex
;
323 for (opIndex
= 1; opIndex
< numOperands
; opIndex
++) {
324 /* Literal constants are always stored as the last operand. */
325 if (MI
.getOperand(opIndex
).isImm() || MI
.getOperand(opIndex
).isFPImm()) {
328 emitSrc(MI
.getOperand(opIndex
));
331 /* Emit zeros for unused sources */
332 for ( ; opIndex
< 4; opIndex
++) {
333 emitNullBytes(SRC_BYTE_COUNT
);
338 emitALU(MI
, numOperands
- 1);
341 void R600CodeEmitter::emitSrc(const MachineOperand
& MO
)
344 /* Emit the source select (2 bytes). For GPRs, this is the register index.
345 * For other potential instruction operands, (e.g. constant registers) the
346 * value of the source select is defined in the r600isa docs. */
348 unsigned reg
= MO
.getReg();
349 emitTwoBytes(getHWReg(reg
));
350 if (reg
== AMDIL::ALU_LITERAL_X
) {
351 const MachineInstr
* parent
= MO
.getParent();
352 unsigned immOpIndex
= parent
->getNumOperands() - 1;
353 MachineOperand immOp
= parent
->getOperand(immOpIndex
);
354 if (immOp
.isFPImm()) {
355 value
= immOp
.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
357 assert(immOp
.isImm());
358 value
= immOp
.getImm();
362 /* XXX: Handle other operand types. */
366 /* Emit the source channel (1 byte) */
368 emitByte(reductionElement
);
369 } else if (MO
.isReg()) {
370 const MachineInstr
* parent
= MO
.getParent();
371 /* The source channel for EXTRACT is stored in operand 2. */
372 if (parent
->getOpcode() == AMDIL::VEXTRACT_v4f32
) {
373 emitByte(parent
->getOperand(2).getImm());
375 emitByte(TRI
->getHWRegChan(MO
.getReg()));
381 /* XXX: Emit isNegated (1 byte) */
382 if ((!(MO
.getTargetFlags() & MO_FLAG_ABS
))
383 && (MO
.getTargetFlags() & MO_FLAG_NEG
||
385 (MO
.getReg() == AMDIL::NEG_ONE
|| MO
.getReg() == AMDIL::NEG_HALF
)))){
391 /* Emit isAbsolute (1 byte) */
392 if (MO
.getTargetFlags() & MO_FLAG_ABS
) {
398 /* XXX: Emit relative addressing mode (1 byte) */
401 /* Emit kc_bank, This will be adjusted later by r600_asm */
404 /* Emit the literal value, if applicable (4 bytes). */
409 void R600CodeEmitter::emitDst(const MachineOperand
& MO
)
412 /* Emit the destination register index (1 byte) */
413 emitByte(getHWReg(MO
.getReg()));
415 /* Emit the element of the destination register (1 byte)*/
416 const MachineInstr
* parent
= MO
.getParent();
418 emitByte(reductionElement
);
420 /* The destination element for SET_CHAN is stored in the 3rd operand. */
421 } else if (parent
->getOpcode() == AMDIL::SET_CHAN
) {
422 emitByte(parent
->getOperand(2).getImm());
423 } else if (parent
->getOpcode() == AMDIL::VCREATE_v4f32
) {
426 emitByte(TRI
->getHWRegChan(MO
.getReg()));
429 /* Emit isClamped (1 byte) */
430 if (MO
.getTargetFlags() & MO_FLAG_CLAMP
) {
436 /* Emit writemask (1 byte). */
437 if ((isReduction
&& reductionElement
!= TRI
->getHWRegChan(MO
.getReg()))
438 || MO
.getTargetFlags() & MO_FLAG_MASK
) {
444 /* XXX: Emit relative addressing mode */
447 /* XXX: Handle other operand types. Are there any for destination regs? */
448 emitNullBytes(DST_BYTE_COUNT
);
452 void R600CodeEmitter::emitALU(MachineInstr
&MI
, unsigned numSrc
)
454 /* Emit the instruction (2 bytes) */
455 emitTwoBytes(getHWInst(MI
));
457 /* Emit isLast (for this instruction group) (1 byte) */
463 /* Emit isOp3 (1 byte) */
470 /* XXX: Emit predicate (1 byte) */
473 /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
474 * r600_asm.c sets it. */
477 /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */
480 /* XXX: Emit OMOD (1 byte) Not implemented. */
483 /* XXX: Emit index_mode. I think this is for indirect addressing, so we
484 * don't need to worry about it. */
488 void R600CodeEmitter::emitTexInstr(MachineInstr
&MI
)
491 int64_t sampler
= MI
.getOperand(2).getImm();
492 int64_t textureType
= MI
.getOperand(3).getImm();
493 unsigned opcode
= MI
.getOpcode();
494 unsigned srcSelect
[4] = {0, 1, 2, 3};
496 /* Emit instruction type */
499 /* Emit instruction */
500 emitByte(getHWInst(MI
));
502 /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */
503 emitByte(sampler
+ 1 + 1);
505 /* Emit source register */
506 emitByte(getHWReg(MI
.getOperand(1).getReg()));
508 /* XXX: Emit src isRelativeAddress */
511 /* Emit destination register */
512 emitByte(getHWReg(MI
.getOperand(0).getReg()));
514 /* XXX: Emit dst isRealtiveAddress */
517 /* XXX: Emit dst select */
523 /* XXX: Emit lod bias */
526 /* XXX: Emit coord types */
527 unsigned coordType
[4] = {1, 1, 1, 1};
529 if (textureType
== TEXTURE_RECT
530 || textureType
== TEXTURE_SHADOWRECT
) {
531 coordType
[ELEMENT_X
] = 0;
532 coordType
[ELEMENT_Y
] = 0;
535 if (textureType
== TEXTURE_1D_ARRAY
536 || textureType
== TEXTURE_SHADOW1D_ARRAY
) {
537 if (opcode
== AMDIL::TEX_SAMPLE_C_L
|| opcode
== AMDIL::TEX_SAMPLE_C_LB
) {
538 coordType
[ELEMENT_Y
] = 0;
540 coordType
[ELEMENT_Z
] = 0;
541 srcSelect
[ELEMENT_Z
] = ELEMENT_Y
;
543 } else if (textureType
== TEXTURE_2D_ARRAY
544 || textureType
== TEXTURE_SHADOW2D_ARRAY
) {
545 coordType
[ELEMENT_Z
] = 0;
548 for (unsigned i
= 0; i
< 4; i
++) {
549 emitByte(coordType
[i
]);
552 /* XXX: Emit offsets */
556 /* There is no OFFSET_W */
558 /* Emit sampler id */
561 /* XXX:Emit source select */
562 if ((textureType
== TEXTURE_SHADOW1D
563 || textureType
== TEXTURE_SHADOW2D
564 || textureType
== TEXTURE_SHADOWRECT
565 || textureType
== TEXTURE_SHADOW1D_ARRAY
)
566 && opcode
!= AMDIL::TEX_SAMPLE_C_L
567 && opcode
!= AMDIL::TEX_SAMPLE_C_LB
) {
568 srcSelect
[ELEMENT_W
] = ELEMENT_Z
;
571 for (unsigned i
= 0; i
< 4; i
++) {
572 emitByte(srcSelect
[i
]);
576 void R600CodeEmitter::emitFCInstr(MachineInstr
&MI
)
578 /* Emit instruction type */
582 unsigned numOperands
= MI
.getNumOperands();
583 if (numOperands
> 0) {
584 assert(numOperands
== 1);
585 emitSrc(MI
.getOperand(0));
587 emitNullBytes(SRC_BYTE_COUNT
);
590 /* Emit FC Instruction */
592 switch (MI
.getOpcode()) {
593 case AMDIL::BREAK_LOGICALZ_f32
:
596 case AMDIL::BREAK_LOGICALNZ_i32
:
597 instr
= FC_BREAK_NZ_INT
;
599 case AMDIL::BREAK_LOGICALZ_i32
:
600 instr
= FC_BREAK_Z_INT
;
602 case AMDIL::CONTINUE_LOGICALNZ_f32
:
605 /* XXX: This assumes that all IFs will be if (x != 0). If we add
606 * optimizations this might not be the case */
607 case AMDIL::IF_LOGICALNZ_f32
:
608 case AMDIL::IF_LOGICALNZ_i32
:
611 case AMDIL::IF_LOGICALZ_f32
:
623 case AMDIL::WHILELOOP
:
633 #define INSTR_FLOAT2_V(inst, hw) \
634 case AMDIL:: inst##_v4f32: \
635 case AMDIL:: inst##_v2f32: return HW_INST2(hw);
637 #define INSTR_FLOAT2_S(inst, hw) \
638 case AMDIL:: inst##_f32: return HW_INST2(hw);
640 #define INSTR_FLOAT2(inst, hw) \
641 INSTR_FLOAT2_V(inst, hw) \
642 INSTR_FLOAT2_S(inst, hw)
644 unsigned int R600CodeEmitter::getHWInst(const MachineInstr
&MI
)
647 /* XXX: Lower these to MOV before the code emitter. */
648 switch (MI
.getOpcode()) {
649 case AMDIL::STORE_OUTPUT
:
650 case AMDIL::VCREATE_v4i32
:
651 case AMDIL::VCREATE_v4f32
:
652 case AMDIL::VEXTRACT_v4f32
:
653 case AMDIL::VINSERT_v4f32
:
654 case AMDIL::LOADCONST_i32
:
655 case AMDIL::LOADCONST_f32
:
656 case AMDIL::MOVE_v4i32
:
657 case AMDIL::SET_CHAN
:
658 /* Instructons to reinterpret bits as ... */
659 case AMDIL::IL_ASINT_f32
:
660 case AMDIL::IL_ASINT_i32
:
661 case AMDIL::IL_ASFLOAT_f32
:
662 case AMDIL::IL_ASFLOAT_i32
:
666 return getBinaryCodeForInstr(MI
);
670 void R600CodeEmitter::emitNullBytes(unsigned int byteCount
)
672 for (unsigned int i
= 0; i
< byteCount
; i
++) {
677 void R600CodeEmitter::emitByte(unsigned int byte
)
679 _OS
.write((uint8_t) byte
& 0xff);
681 void R600CodeEmitter::emitTwoBytes(unsigned int bytes
)
683 _OS
.write((uint8_t) (bytes
& 0xff));
684 _OS
.write((uint8_t) ((bytes
>> 8) & 0xff));
687 void R600CodeEmitter::emit(uint32_t value
)
689 for (unsigned i
= 0; i
< 4; i
++) {
690 _OS
.write((uint8_t) ((value
>> (8 * i
)) & 0xff));
694 void R600CodeEmitter::emit(uint64_t value
)
696 for (unsigned i
= 0; i
< 8; i
++) {
697 emitByte((value
>> (8 * i
)) & 0xff);
701 unsigned R600CodeEmitter::getHWReg(unsigned regNo
) const
705 hwReg
= TRI
->getHWRegIndex(regNo
);
706 if (AMDIL::R600_CReg32RegClass
.contains(regNo
)) {
712 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr
&MI
,
713 const MachineOperand
&MO
) const
716 return getHWReg(MO
.getReg());
723 RegElement
maskBitToElement(unsigned int maskBit
)
726 case WRITE_MASK_X
: return ELEMENT_X
;
727 case WRITE_MASK_Y
: return ELEMENT_Y
;
728 case WRITE_MASK_Z
: return ELEMENT_Z
;
729 case WRITE_MASK_W
: return ELEMENT_W
;
731 assert("Invalid maskBit");
736 unsigned int dstSwizzleToWriteMask(unsigned swizzle
)
740 case AMDIL_DST_SWIZZLE_DEFAULT
:
741 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
742 case AMDIL_DST_SWIZZLE_X___
:
744 case AMDIL_DST_SWIZZLE_XY__
:
745 return WRITE_MASK_X
| WRITE_MASK_Y
;
746 case AMDIL_DST_SWIZZLE_XYZ_
:
747 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
;
748 case AMDIL_DST_SWIZZLE_XYZW
:
749 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
750 case AMDIL_DST_SWIZZLE__Y__
:
752 case AMDIL_DST_SWIZZLE__YZ_
:
753 return WRITE_MASK_Y
| WRITE_MASK_Z
;
754 case AMDIL_DST_SWIZZLE__YZW
:
755 return WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
756 case AMDIL_DST_SWIZZLE___Z_
:
758 case AMDIL_DST_SWIZZLE___ZW
:
759 return WRITE_MASK_Z
| WRITE_MASK_W
;
760 case AMDIL_DST_SWIZZLE____W
:
762 case AMDIL_DST_SWIZZLE_X_ZW
:
763 return WRITE_MASK_X
| WRITE_MASK_Z
| WRITE_MASK_W
;
764 case AMDIL_DST_SWIZZLE_XY_W
:
765 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_W
;
766 case AMDIL_DST_SWIZZLE_X_Z_
:
767 return WRITE_MASK_X
| WRITE_MASK_Z
;
768 case AMDIL_DST_SWIZZLE_X__W
:
769 return WRITE_MASK_X
| WRITE_MASK_W
;
770 case AMDIL_DST_SWIZZLE__Y_W
:
771 return WRITE_MASK_Y
| WRITE_MASK_W
;
775 #include "AMDILGenCodeEmitter.inc"