1 //===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // TODO: Add full description
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUUtil.h"
16 #include "AMDILCodeEmitter.h"
17 #include "AMDILInstrInfo.h"
18 #include "AMDILUtilityFunctions.h"
19 #include "R600RegisterInfo.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/Support/DataTypes.h"
24 #include "llvm/Support/FormattedStream.h"
25 #include "llvm/Target/TargetMachine.h"
29 #define SRC_BYTE_COUNT 11
30 #define DST_BYTE_COUNT 5
36 class R600CodeEmitter
: public MachineFunctionPass
, public AMDILCodeEmitter
{
41 formatted_raw_ostream
&_OS
;
42 const TargetMachine
* TM
;
43 const MachineRegisterInfo
* MRI
;
44 const R600RegisterInfo
* TRI
;
45 bool evergreenEncoding
;
48 unsigned reductionElement
;
51 unsigned section_start
;
55 R600CodeEmitter(formatted_raw_ostream
&OS
) : MachineFunctionPass(ID
),
56 _OS(OS
), TM(NULL
), evergreenEncoding(false), isReduction(false),
59 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
61 bool runOnMachineFunction(MachineFunction
&MF
);
62 virtual uint64_t getMachineOpValue(const MachineInstr
&MI
,
63 const MachineOperand
&MO
) const;
67 void emitALUInstr(MachineInstr
&MI
);
68 void emitSrc(const MachineOperand
& MO
);
69 void emitDst(const MachineOperand
& MO
);
70 void emitALU(MachineInstr
&MI
, unsigned numSrc
);
71 void emitTexInstr(MachineInstr
&MI
);
72 void emitFCInstr(MachineInstr
&MI
);
74 unsigned int getHWInst(const MachineInstr
&MI
);
76 void emitNullBytes(unsigned int byteCount
);
78 void emitByte(unsigned int byte
);
80 void emitTwoBytes(uint32_t bytes
);
82 void emit(uint32_t value
);
83 void emit(uint64_t value
);
85 unsigned getHWReg(unsigned regNo
) const;
87 unsigned getElement(unsigned regNo
);
91 } /* End anonymous namespace */
93 #define WRITE_MASK_X 0x1
94 #define WRITE_MASK_Y 0x2
95 #define WRITE_MASK_Z 0x4
96 #define WRITE_MASK_W 0x8
136 TEXTURE_SHADOW1D_ARRAY
,
137 TEXTURE_SHADOW2D_ARRAY
140 char R600CodeEmitter::ID
= 0;
142 FunctionPass
*llvm::createR600CodeEmitterPass(formatted_raw_ostream
&OS
) {
143 return new R600CodeEmitter(OS
);
146 bool R600CodeEmitter::runOnMachineFunction(MachineFunction
&MF
) {
148 TM
= &MF
.getTarget();
149 MRI
= &MF
.getRegInfo();
150 TRI
= static_cast<const R600RegisterInfo
*>(TM
->getRegisterInfo());
151 const AMDILSubtarget
&STM
= TM
->getSubtarget
<AMDILSubtarget
>();
152 std::string gpu
= STM
.getDeviceName();
153 if (!gpu
.compare(0,3, "rv7")) {
154 evergreenEncoding
= false;
156 evergreenEncoding
= true;
158 const AMDGPUTargetMachine
*amdtm
=
159 static_cast<const AMDGPUTargetMachine
*>(&MF
.getTarget());
161 if (amdtm
->shouldDumpCode()) {
165 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
167 MachineBasicBlock
&MBB
= *BB
;
168 for (MachineBasicBlock::iterator I
= MBB
.begin(), E
= MBB
.end();
170 MachineInstr
&MI
= *I
;
171 if (MI
.getNumOperands() > 1 && MI
.getOperand(0).isReg() && MI
.getOperand(0).isDead()) {
174 if (isTexOp(MI
.getOpcode())) {
176 } else if (isFCOp(MI
.getOpcode())){
178 } else if (isReductionOp(MI
.getOpcode())) {
181 for (reductionElement
= 0; reductionElement
< 4; reductionElement
++) {
182 isLast
= (reductionElement
== 3);
186 } else if (MI
.getOpcode() == AMDIL::RETURN
||
187 MI
.getOpcode() == AMDIL::BUNDLE
||
188 MI
.getOpcode() == AMDIL::KILL
) {
191 switch(MI
.getOpcode()) {
192 case AMDIL::RAT_WRITE_CACHELESS_eg
:
194 /* XXX: Support for autoencoding 64-bit instructions was added
195 * in LLVM 3.1. Until we drop support for 3.0, we will use Magic
196 * numbers for the high bits. */
197 uint64_t high
= 0x95c0100000000000;
198 uint64_t inst
= getBinaryCodeForInstr(MI
);
200 /* Set End Of Program bit */
201 /* XXX: Need better check of end of program. EOP should be
202 * encoded in one of the operands of the MI, and it should be
203 * set in a prior pass. */
204 MachineBasicBlock::iterator NextI
= llvm::next(I
);
205 MachineInstr
&NextMI
= *NextI
;
206 if (NextMI
.getOpcode() == AMDIL::RETURN
) {
207 inst
|= (((uint64_t)1) << 53);
209 emitByte(INSTR_NATIVE
);
213 case AMDIL::VTX_READ_eg
:
223 emitByte(MI
.getOperand(2).getImm());
226 emitByte(getHWReg(MI
.getOperand(1).getReg()));
229 emitByte(TRI
->getHWRegChan(MI
.getOperand(1).getReg()));
231 /* mega_fetch_count */
235 emitByte(getHWReg(MI
.getOperand(0).getReg()));
249 /* use_const_fields */
258 /* format_comp_all */
282 void R600CodeEmitter::emitALUInstr(MachineInstr
&MI
)
285 unsigned numOperands
= MI
.getNumExplicitOperands();
287 /* Some instructions are just place holder instructions that represent
288 * operations that the GPU does automatically. They should be ignored. */
289 if (isPlaceHolderOpcode(MI
.getOpcode())) {
293 /* We need to handle some opcodes differently */
294 switch (MI
.getOpcode()) {
298 case AMDIL::STORE_OUTPUT
:
303 /* XXX Check if instruction writes a result */
304 if (numOperands
< 1) {
307 const MachineOperand dstOp
= MI
.getOperand(0);
309 /* Emit instruction type */
312 unsigned int opIndex
;
313 for (opIndex
= 1; opIndex
< numOperands
; opIndex
++) {
314 /* Literal constants are always stored as the last operand. */
315 if (MI
.getOperand(opIndex
).isImm() || MI
.getOperand(opIndex
).isFPImm()) {
318 emitSrc(MI
.getOperand(opIndex
));
321 /* Emit zeros for unused sources */
322 for ( ; opIndex
< 4; opIndex
++) {
323 emitNullBytes(SRC_BYTE_COUNT
);
328 emitALU(MI
, numOperands
- 1);
331 void R600CodeEmitter::emitSrc(const MachineOperand
& MO
)
334 /* Emit the source select (2 bytes). For GPRs, this is the register index.
335 * For other potential instruction operands, (e.g. constant registers) the
336 * value of the source select is defined in the r600isa docs. */
338 unsigned reg
= MO
.getReg();
339 emitTwoBytes(getHWReg(reg
));
340 if (reg
== AMDIL::ALU_LITERAL_X
) {
341 const MachineInstr
* parent
= MO
.getParent();
342 unsigned immOpIndex
= parent
->getNumExplicitOperands() - 1;
343 MachineOperand immOp
= parent
->getOperand(immOpIndex
);
344 if (immOp
.isFPImm()) {
345 value
= immOp
.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
347 assert(immOp
.isImm());
348 value
= immOp
.getImm();
352 /* XXX: Handle other operand types. */
356 /* Emit the source channel (1 byte) */
358 emitByte(reductionElement
);
359 } else if (MO
.isReg()) {
360 emitByte(TRI
->getHWRegChan(MO
.getReg()));
365 /* XXX: Emit isNegated (1 byte) */
366 if ((!(MO
.getTargetFlags() & MO_FLAG_ABS
))
367 && (MO
.getTargetFlags() & MO_FLAG_NEG
||
369 (MO
.getReg() == AMDIL::NEG_ONE
|| MO
.getReg() == AMDIL::NEG_HALF
)))){
375 /* Emit isAbsolute (1 byte) */
376 if (MO
.getTargetFlags() & MO_FLAG_ABS
) {
382 /* XXX: Emit relative addressing mode (1 byte) */
385 /* Emit kc_bank, This will be adjusted later by r600_asm */
388 /* Emit the literal value, if applicable (4 bytes). */
393 void R600CodeEmitter::emitDst(const MachineOperand
& MO
)
396 /* Emit the destination register index (1 byte) */
397 emitByte(getHWReg(MO
.getReg()));
399 /* Emit the element of the destination register (1 byte)*/
401 emitByte(reductionElement
);
403 emitByte(TRI
->getHWRegChan(MO
.getReg()));
406 /* Emit isClamped (1 byte) */
407 if (MO
.getTargetFlags() & MO_FLAG_CLAMP
) {
413 /* Emit writemask (1 byte). */
414 if ((isReduction
&& reductionElement
!= TRI
->getHWRegChan(MO
.getReg()))
415 || MO
.getTargetFlags() & MO_FLAG_MASK
) {
421 /* XXX: Emit relative addressing mode */
424 /* XXX: Handle other operand types. Are there any for destination regs? */
425 emitNullBytes(DST_BYTE_COUNT
);
429 void R600CodeEmitter::emitALU(MachineInstr
&MI
, unsigned numSrc
)
431 /* Emit the instruction (2 bytes) */
432 emitTwoBytes(getHWInst(MI
));
434 /* Emit isLast (for this instruction group) (1 byte) */
440 /* Emit isOp3 (1 byte) */
447 /* XXX: Emit predicate (1 byte) */
450 /* XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
451 * r600_asm.c sets it. */
454 /* XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. */
457 /* XXX: Emit OMOD (1 byte) Not implemented. */
460 /* XXX: Emit index_mode. I think this is for indirect addressing, so we
461 * don't need to worry about it. */
465 void R600CodeEmitter::emitTexInstr(MachineInstr
&MI
)
468 int64_t sampler
= MI
.getOperand(2).getImm();
469 int64_t textureType
= MI
.getOperand(3).getImm();
470 unsigned opcode
= MI
.getOpcode();
471 unsigned srcSelect
[4] = {0, 1, 2, 3};
473 /* Emit instruction type */
476 /* Emit instruction */
477 emitByte(getHWInst(MI
));
479 /* XXX: Emit resource id r600_shader.c uses sampler + 1. Why? */
480 emitByte(sampler
+ 1 + 1);
482 /* Emit source register */
483 emitByte(getHWReg(MI
.getOperand(1).getReg()));
485 /* XXX: Emit src isRelativeAddress */
488 /* Emit destination register */
489 emitByte(getHWReg(MI
.getOperand(0).getReg()));
491 /* XXX: Emit dst isRealtiveAddress */
494 /* XXX: Emit dst select */
500 /* XXX: Emit lod bias */
503 /* XXX: Emit coord types */
504 unsigned coordType
[4] = {1, 1, 1, 1};
506 if (textureType
== TEXTURE_RECT
507 || textureType
== TEXTURE_SHADOWRECT
) {
508 coordType
[ELEMENT_X
] = 0;
509 coordType
[ELEMENT_Y
] = 0;
512 if (textureType
== TEXTURE_1D_ARRAY
513 || textureType
== TEXTURE_SHADOW1D_ARRAY
) {
514 if (opcode
== AMDIL::TEX_SAMPLE_C_L
|| opcode
== AMDIL::TEX_SAMPLE_C_LB
) {
515 coordType
[ELEMENT_Y
] = 0;
517 coordType
[ELEMENT_Z
] = 0;
518 srcSelect
[ELEMENT_Z
] = ELEMENT_Y
;
520 } else if (textureType
== TEXTURE_2D_ARRAY
521 || textureType
== TEXTURE_SHADOW2D_ARRAY
) {
522 coordType
[ELEMENT_Z
] = 0;
525 for (unsigned i
= 0; i
< 4; i
++) {
526 emitByte(coordType
[i
]);
529 /* XXX: Emit offsets */
533 /* There is no OFFSET_W */
535 /* Emit sampler id */
538 /* XXX:Emit source select */
539 if ((textureType
== TEXTURE_SHADOW1D
540 || textureType
== TEXTURE_SHADOW2D
541 || textureType
== TEXTURE_SHADOWRECT
542 || textureType
== TEXTURE_SHADOW1D_ARRAY
)
543 && opcode
!= AMDIL::TEX_SAMPLE_C_L
544 && opcode
!= AMDIL::TEX_SAMPLE_C_LB
) {
545 srcSelect
[ELEMENT_W
] = ELEMENT_Z
;
548 for (unsigned i
= 0; i
< 4; i
++) {
549 emitByte(srcSelect
[i
]);
553 void R600CodeEmitter::emitFCInstr(MachineInstr
&MI
)
555 /* Emit instruction type */
559 unsigned numOperands
= MI
.getNumOperands();
560 if (numOperands
> 0) {
561 assert(numOperands
== 1);
562 emitSrc(MI
.getOperand(0));
564 emitNullBytes(SRC_BYTE_COUNT
);
567 /* Emit FC Instruction */
569 switch (MI
.getOpcode()) {
570 case AMDIL::BREAK_LOGICALZ_f32
:
573 case AMDIL::BREAK_LOGICALNZ_i32
:
574 instr
= FC_BREAK_NZ_INT
;
576 case AMDIL::BREAK_LOGICALZ_i32
:
577 instr
= FC_BREAK_Z_INT
;
579 case AMDIL::CONTINUE_LOGICALNZ_f32
:
582 /* XXX: This assumes that all IFs will be if (x != 0). If we add
583 * optimizations this might not be the case */
584 case AMDIL::IF_LOGICALNZ_f32
:
585 case AMDIL::IF_LOGICALNZ_i32
:
588 case AMDIL::IF_LOGICALZ_f32
:
600 case AMDIL::WHILELOOP
:
610 #define INSTR_FLOAT2_V(inst, hw) \
611 case AMDIL:: inst##_v4f32: \
612 case AMDIL:: inst##_v2f32: return HW_INST2(hw);
614 #define INSTR_FLOAT2_S(inst, hw) \
615 case AMDIL:: inst##_f32: return HW_INST2(hw);
617 #define INSTR_FLOAT2(inst, hw) \
618 INSTR_FLOAT2_V(inst, hw) \
619 INSTR_FLOAT2_S(inst, hw)
621 unsigned int R600CodeEmitter::getHWInst(const MachineInstr
&MI
)
624 /* XXX: Lower these to MOV before the code emitter. */
625 switch (MI
.getOpcode()) {
626 case AMDIL::STORE_OUTPUT
:
627 case AMDIL::VCREATE_v4i32
:
628 case AMDIL::LOADCONST_i32
:
629 case AMDIL::LOADCONST_f32
:
630 case AMDIL::MOVE_v4i32
:
631 /* Instructons to reinterpret bits as ... */
632 case AMDIL::IL_ASINT_f32
:
633 case AMDIL::IL_ASINT_i32
:
634 case AMDIL::IL_ASFLOAT_f32
:
635 case AMDIL::IL_ASFLOAT_i32
:
639 return getBinaryCodeForInstr(MI
);
643 void R600CodeEmitter::emitNullBytes(unsigned int byteCount
)
645 for (unsigned int i
= 0; i
< byteCount
; i
++) {
650 void R600CodeEmitter::emitByte(unsigned int byte
)
652 _OS
.write((uint8_t) byte
& 0xff);
654 void R600CodeEmitter::emitTwoBytes(unsigned int bytes
)
656 _OS
.write((uint8_t) (bytes
& 0xff));
657 _OS
.write((uint8_t) ((bytes
>> 8) & 0xff));
660 void R600CodeEmitter::emit(uint32_t value
)
662 for (unsigned i
= 0; i
< 4; i
++) {
663 _OS
.write((uint8_t) ((value
>> (8 * i
)) & 0xff));
667 void R600CodeEmitter::emit(uint64_t value
)
669 for (unsigned i
= 0; i
< 8; i
++) {
670 emitByte((value
>> (8 * i
)) & 0xff);
674 unsigned R600CodeEmitter::getHWReg(unsigned regNo
) const
678 hwReg
= TRI
->getHWRegIndex(regNo
);
679 if (AMDIL::R600_CReg32RegClass
.contains(regNo
)) {
685 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr
&MI
,
686 const MachineOperand
&MO
) const
689 return getHWReg(MO
.getReg());
696 RegElement
maskBitToElement(unsigned int maskBit
)
699 case WRITE_MASK_X
: return ELEMENT_X
;
700 case WRITE_MASK_Y
: return ELEMENT_Y
;
701 case WRITE_MASK_Z
: return ELEMENT_Z
;
702 case WRITE_MASK_W
: return ELEMENT_W
;
704 assert("Invalid maskBit");
709 unsigned int dstSwizzleToWriteMask(unsigned swizzle
)
713 case AMDIL_DST_SWIZZLE_DEFAULT
:
714 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
715 case AMDIL_DST_SWIZZLE_X___
:
717 case AMDIL_DST_SWIZZLE_XY__
:
718 return WRITE_MASK_X
| WRITE_MASK_Y
;
719 case AMDIL_DST_SWIZZLE_XYZ_
:
720 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
;
721 case AMDIL_DST_SWIZZLE_XYZW
:
722 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
723 case AMDIL_DST_SWIZZLE__Y__
:
725 case AMDIL_DST_SWIZZLE__YZ_
:
726 return WRITE_MASK_Y
| WRITE_MASK_Z
;
727 case AMDIL_DST_SWIZZLE__YZW
:
728 return WRITE_MASK_Y
| WRITE_MASK_Z
| WRITE_MASK_W
;
729 case AMDIL_DST_SWIZZLE___Z_
:
731 case AMDIL_DST_SWIZZLE___ZW
:
732 return WRITE_MASK_Z
| WRITE_MASK_W
;
733 case AMDIL_DST_SWIZZLE____W
:
735 case AMDIL_DST_SWIZZLE_X_ZW
:
736 return WRITE_MASK_X
| WRITE_MASK_Z
| WRITE_MASK_W
;
737 case AMDIL_DST_SWIZZLE_XY_W
:
738 return WRITE_MASK_X
| WRITE_MASK_Y
| WRITE_MASK_W
;
739 case AMDIL_DST_SWIZZLE_X_Z_
:
740 return WRITE_MASK_X
| WRITE_MASK_Z
;
741 case AMDIL_DST_SWIZZLE_X__W
:
742 return WRITE_MASK_X
| WRITE_MASK_W
;
743 case AMDIL_DST_SWIZZLE__Y_W
:
744 return WRITE_MASK_Y
| WRITE_MASK_W
;
748 #include "AMDILGenCodeEmitter.inc"