14e877b2518ad85b2f9c069526aaeeada8ebec3d
[mesa.git] / src / gallium / drivers / radeon / R600CodeEmitter.cpp
1 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code emitters outputs bytecode that is understood by the r600g driver
11 // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12 // except that the size of the instruction fields are rounded up to the
13 // nearest byte.
14 //
15 // [1] http://www.mesa3d.org/
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "AMDGPU.h"
20 #include "AMDGPUCodeEmitter.h"
21 #include "AMDGPUInstrInfo.h"
22 #include "AMDILUtilityFunctions.h"
23 #include "R600InstrInfo.h"
24 #include "R600RegisterInfo.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/Support/DataTypes.h"
29 #include "llvm/Support/FormattedStream.h"
30 #include "llvm/Target/TargetMachine.h"
31
32 #include <stdio.h>
33
34 #define SRC_BYTE_COUNT 11
35 #define DST_BYTE_COUNT 5
36
37 using namespace llvm;
38
39 namespace {
40
41 class R600CodeEmitter : public MachineFunctionPass, public AMDGPUCodeEmitter {
42
43 private:
44
45 static char ID;
46 formatted_raw_ostream &_OS;
47 const TargetMachine * TM;
48 const MachineRegisterInfo * MRI;
49 const R600RegisterInfo * TRI;
50 const R600InstrInfo * TII;
51
52 bool IsCube;
53 bool IsVector;
54 unsigned currentElement;
55 bool IsLast;
56
57 unsigned section_start;
58
59 public:
60
61 R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
62 _OS(OS), TM(NULL), IsCube(false), IsVector(false),
63 IsLast(true) { }
64
65 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
66
67 bool runOnMachineFunction(MachineFunction &MF);
68 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
69 const MachineOperand &MO) const;
70
71 private:
72
73 void EmitALUInstr(MachineInstr &MI);
74 void EmitSrc(const MachineOperand & MO, int chan_override = -1);
75 void EmitDst(const MachineOperand & MO);
76 void EmitALU(MachineInstr &MI, unsigned numSrc);
77 void EmitTexInstr(MachineInstr &MI);
78 void EmitFCInstr(MachineInstr &MI);
79
80 void EmitNullBytes(unsigned int byteCount);
81
82 void EmitByte(unsigned int byte);
83
84 void EmitTwoBytes(uint32_t bytes);
85
86 void Emit(uint32_t value);
87 void Emit(uint64_t value);
88
89 unsigned getHWReg(unsigned regNo) const;
90
91 };
92
93 } // End anonymous namespace
94
95 enum RegElement {
96 ELEMENT_X = 0,
97 ELEMENT_Y,
98 ELEMENT_Z,
99 ELEMENT_W
100 };
101
102 enum InstrTypes {
103 INSTR_ALU = 0,
104 INSTR_TEX,
105 INSTR_FC,
106 INSTR_NATIVE,
107 INSTR_VTX
108 };
109
110 enum FCInstr {
111 FC_IF = 0,
112 FC_IF_INT,
113 FC_ELSE,
114 FC_ENDIF,
115 FC_BGNLOOP,
116 FC_ENDLOOP,
117 FC_BREAK,
118 FC_BREAK_NZ_INT,
119 FC_CONTINUE,
120 FC_BREAK_Z_INT,
121 FC_BREAK_NZ
122 };
123
124 enum TextureTypes {
125 TEXTURE_1D = 1,
126 TEXTURE_2D,
127 TEXTURE_3D,
128 TEXTURE_CUBE,
129 TEXTURE_RECT,
130 TEXTURE_SHADOW1D,
131 TEXTURE_SHADOW2D,
132 TEXTURE_SHADOWRECT,
133 TEXTURE_1D_ARRAY,
134 TEXTURE_2D_ARRAY,
135 TEXTURE_SHADOW1D_ARRAY,
136 TEXTURE_SHADOW2D_ARRAY
137 };
138
139 char R600CodeEmitter::ID = 0;
140
141 FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
142 return new R600CodeEmitter(OS);
143 }
144
145 bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
146
147 TM = &MF.getTarget();
148 MRI = &MF.getRegInfo();
149 TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
150 TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
151 const AMDGPUSubtarget &STM = TM->getSubtarget<AMDGPUSubtarget>();
152 std::string gpu = STM.getDeviceName();
153
154 if (STM.dumpCode()) {
155 MF.dump();
156 }
157
158 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
159 BB != BB_E; ++BB) {
160 MachineBasicBlock &MBB = *BB;
161 for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
162 E = MBB.instr_end(); I != E; ++I) {
163 MachineInstr &MI = *I;
164 IsVector = TII->isVector(MI);
165 IsCube = TII->isCubeOp(MI.getOpcode());
166 if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
167 continue;
168 }
169 if (TII->isTexOp(MI.getOpcode())) {
170 EmitTexInstr(MI);
171 } else if (TII->isFCOp(MI.getOpcode())){
172 EmitFCInstr(MI);
173 } else if (IsVector || IsCube) {
174 IsLast = false;
175 // XXX: On Cayman, some (all?) of the vector instructions only need
176 // to fill the first three slots.
177 for (currentElement = 0; currentElement < 4; currentElement++) {
178 IsLast = (currentElement == 3);
179 EmitALUInstr(MI);
180 }
181 IsVector = false;
182 IsCube = false;
183 } else if (MI.getOpcode() == AMDGPU::RETURN ||
184 MI.getOpcode() == AMDGPU::BUNDLE ||
185 MI.getOpcode() == AMDGPU::KILL) {
186 continue;
187 } else {
188 switch(MI.getOpcode()) {
189 case AMDGPU::RAT_WRITE_CACHELESS_eg:
190 {
191 uint64_t inst = getBinaryCodeForInstr(MI);
192 // Set End Of Program bit
193 // XXX: Need better check of end of program. EOP should be
194 // encoded in one of the operands of the MI, and it should be
195 // set in a prior pass.
196 MachineBasicBlock::iterator NextI = llvm::next(I);
197 MachineInstr &NextMI = *NextI;
198 if (NextMI.getOpcode() == AMDGPU::RETURN) {
199 inst |= (((uint64_t)1) << 53);
200 }
201 EmitByte(INSTR_NATIVE);
202 Emit(inst);
203 break;
204 }
205 case AMDGPU::VTX_READ_PARAM_i32_eg:
206 case AMDGPU::VTX_READ_PARAM_f32_eg:
207 case AMDGPU::VTX_READ_GLOBAL_i32_eg:
208 case AMDGPU::VTX_READ_GLOBAL_f32_eg:
209 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
210 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
211 {
212 uint64_t InstWord01 = getBinaryCodeForInstr(MI);
213 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
214
215 EmitByte(INSTR_VTX);
216 Emit(InstWord01);
217 Emit(InstWord2);
218 break;
219 }
220
221 default:
222 EmitALUInstr(MI);
223 break;
224 }
225 }
226 }
227 }
228 return false;
229 }
230
231 void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
232 {
233
234 unsigned numOperands = MI.getNumExplicitOperands();
235 if(MI.findFirstPredOperandIdx() > -1)
236 numOperands--;
237
238 // Some instructions are just place holder instructions that represent
239 // operations that the GPU does automatically. They should be ignored.
240 if (TII->isPlaceHolderOpcode(MI.getOpcode())) {
241 return;
242 }
243
244 if(MI.getOpcode() == AMDGPU::PRED_X)
245 numOperands = 2;
246
247 // XXX Check if instruction writes a result
248 if (numOperands < 1) {
249 return;
250 }
251 const MachineOperand dstOp = MI.getOperand(0);
252
253 // Emit instruction type
254 EmitByte(0);
255
256 if (IsCube) {
257 static const int cube_src_swz[] = {2, 2, 0, 1};
258 EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
259 EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
260 EmitNullBytes(SRC_BYTE_COUNT);
261 } else {
262 unsigned int opIndex;
263 for (opIndex = 1; opIndex < numOperands; opIndex++) {
264 // Literal constants are always stored as the last operand.
265 if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
266 break;
267 }
268 EmitSrc(MI.getOperand(opIndex));
269 }
270
271 // Emit zeros for unused sources
272 for ( ; opIndex < 4; opIndex++) {
273 EmitNullBytes(SRC_BYTE_COUNT);
274 }
275 }
276
277 EmitDst(dstOp);
278
279 EmitALU(MI, numOperands - 1);
280 }
281
282 void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
283 {
284 uint32_t value = 0;
285 // Emit the source select (2 bytes). For GPRs, this is the register index.
286 // For other potential instruction operands, (e.g. constant registers) the
287 // value of the source select is defined in the r600isa docs.
288 if (MO.isReg()) {
289 unsigned reg = MO.getReg();
290 EmitTwoBytes(getHWReg(reg));
291 if (reg == AMDGPU::ALU_LITERAL_X) {
292 const MachineInstr * parent = MO.getParent();
293 unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
294 MachineOperand immOp = parent->getOperand(immOpIndex);
295 if (immOp.isFPImm()) {
296 value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
297 } else {
298 assert(immOp.isImm());
299 value = immOp.getImm();
300 }
301 }
302 } else {
303 // XXX: Handle other operand types.
304 EmitTwoBytes(0);
305 }
306
307 // Emit the source channel (1 byte)
308 if (chan_override != -1) {
309 EmitByte(chan_override);
310 } else if (MO.isReg()) {
311 EmitByte(TRI->getHWRegChan(MO.getReg()));
312 } else {
313 EmitByte(0);
314 }
315
316 // XXX: Emit isNegated (1 byte)
317 if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
318 && (MO.getTargetFlags() & MO_FLAG_NEG ||
319 (MO.isReg() &&
320 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
321 EmitByte(1);
322 } else {
323 EmitByte(0);
324 }
325
326 // Emit isAbsolute (1 byte)
327 if (MO.getTargetFlags() & MO_FLAG_ABS) {
328 EmitByte(1);
329 } else {
330 EmitByte(0);
331 }
332
333 // XXX: Emit relative addressing mode (1 byte)
334 EmitByte(0);
335
336 // Emit kc_bank, This will be adjusted later by r600_asm
337 EmitByte(0);
338
339 // Emit the literal value, if applicable (4 bytes).
340 Emit(value);
341
342 }
343
344 void R600CodeEmitter::EmitDst(const MachineOperand & MO)
345 {
346 if (MO.isReg() && MO.getReg() != AMDGPU::PREDICATE_BIT) {
347 // Emit the destination register index (1 byte)
348 EmitByte(getHWReg(MO.getReg()));
349
350 // Emit the element of the destination register (1 byte)
351 if (IsCube || IsVector) {
352 EmitByte(currentElement);
353 } else {
354 EmitByte(TRI->getHWRegChan(MO.getReg()));
355 }
356
357 // Emit isClamped (1 byte)
358 if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
359 EmitByte(1);
360 } else {
361 EmitByte(0);
362 }
363
364 // Emit writemask (1 byte).
365 if ((IsVector &&
366 currentElement != TRI->getHWRegChan(MO.getReg()))
367 || MO.getTargetFlags() & MO_FLAG_MASK) {
368 EmitByte(0);
369 } else {
370 EmitByte(1);
371 }
372
373 // XXX: Emit relative addressing mode
374 EmitByte(0);
375 } else {
376 // XXX: Handle other operand types. Are there any for destination regs?
377 EmitNullBytes(DST_BYTE_COUNT);
378 }
379 }
380
381 void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
382 {
383 // Emit the instruction (2 bytes)
384 EmitTwoBytes(getBinaryCodeForInstr(MI));
385
386 // Emit IsLast (for this instruction group) (1 byte)
387 if (!IsLast ||
388 (MI.isInsideBundle() &&
389 !(MI.getOperand(0).getTargetFlags() & MO_FLAG_LAST))) {
390 EmitByte(0);
391 } else {
392 EmitByte(1);
393 }
394
395 // Emit isOp3 (1 byte)
396 if (numSrc == 3) {
397 EmitByte(1);
398 } else {
399 EmitByte(0);
400 }
401
402 // XXX: Emit push modifier
403 if(MI.getOperand(1).getTargetFlags() & MO_FLAG_PUSH) {
404 EmitByte(1);
405 } else {
406 EmitByte(0);
407 }
408
409 // XXX: Emit predicate (1 byte)
410 int predidx = MI.findFirstPredOperandIdx();
411 if (predidx > -1)
412 switch(MI.getOperand(predidx).getReg()) {
413 case AMDGPU::PRED_SEL_ZERO:
414 EmitByte(2);
415 break;
416 case AMDGPU::PRED_SEL_ONE:
417 EmitByte(3);
418 break;
419 default:
420 EmitByte(0);
421 break;
422 }
423 else {
424 EmitByte(0);
425 }
426
427
428 // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
429 // r600_asm.c sets it.
430 EmitByte(0);
431
432 // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
433 EmitByte(0);
434
435 // XXX: Emit OMOD (1 byte) Not implemented.
436 EmitByte(0);
437
438 // XXX: Emit index_mode. I think this is for indirect addressing, so we
439 // don't need to worry about it.
440 EmitByte(0);
441 }
442
443 void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
444 {
445
446 unsigned opcode = MI.getOpcode();
447 bool hasOffsets = (opcode == AMDGPU::TEX_LD);
448 unsigned op_offset = hasOffsets ? 3 : 0;
449 int64_t sampler = MI.getOperand(op_offset+2).getImm();
450 int64_t textureType = MI.getOperand(op_offset+3).getImm();
451 unsigned srcSelect[4] = {0, 1, 2, 3};
452
453 // Emit instruction type
454 EmitByte(1);
455
456 // Emit instruction
457 EmitByte(getBinaryCodeForInstr(MI));
458
459 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
460 EmitByte(sampler + 1 + 1);
461
462 // Emit source register
463 EmitByte(getHWReg(MI.getOperand(1).getReg()));
464
465 // XXX: Emit src isRelativeAddress
466 EmitByte(0);
467
468 // Emit destination register
469 EmitByte(getHWReg(MI.getOperand(0).getReg()));
470
471 // XXX: Emit dst isRealtiveAddress
472 EmitByte(0);
473
474 // XXX: Emit dst select
475 EmitByte(0); // X
476 EmitByte(1); // Y
477 EmitByte(2); // Z
478 EmitByte(3); // W
479
480 // XXX: Emit lod bias
481 EmitByte(0);
482
483 // XXX: Emit coord types
484 unsigned coordType[4] = {1, 1, 1, 1};
485
486 if (textureType == TEXTURE_RECT
487 || textureType == TEXTURE_SHADOWRECT) {
488 coordType[ELEMENT_X] = 0;
489 coordType[ELEMENT_Y] = 0;
490 }
491
492 if (textureType == TEXTURE_1D_ARRAY
493 || textureType == TEXTURE_SHADOW1D_ARRAY) {
494 if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
495 coordType[ELEMENT_Y] = 0;
496 } else {
497 coordType[ELEMENT_Z] = 0;
498 srcSelect[ELEMENT_Z] = ELEMENT_Y;
499 }
500 } else if (textureType == TEXTURE_2D_ARRAY
501 || textureType == TEXTURE_SHADOW2D_ARRAY) {
502 coordType[ELEMENT_Z] = 0;
503 }
504
505 for (unsigned i = 0; i < 4; i++) {
506 EmitByte(coordType[i]);
507 }
508
509 // XXX: Emit offsets
510 if (hasOffsets)
511 for (unsigned i = 2; i < 5; i++)
512 EmitByte(MI.getOperand(i).getImm()<<1);
513 else
514 EmitNullBytes(3);
515
516 // Emit sampler id
517 EmitByte(sampler);
518
519 // XXX:Emit source select
520 if ((textureType == TEXTURE_SHADOW1D
521 || textureType == TEXTURE_SHADOW2D
522 || textureType == TEXTURE_SHADOWRECT
523 || textureType == TEXTURE_SHADOW1D_ARRAY)
524 && opcode != AMDGPU::TEX_SAMPLE_C_L
525 && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
526 srcSelect[ELEMENT_W] = ELEMENT_Z;
527 }
528
529 for (unsigned i = 0; i < 4; i++) {
530 EmitByte(srcSelect[i]);
531 }
532 }
533
534 void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
535 {
536 // Emit instruction type
537 EmitByte(INSTR_FC);
538
539 // Emit SRC
540 unsigned numOperands = MI.getNumOperands();
541 if (numOperands > 0) {
542 assert(numOperands == 1);
543 EmitSrc(MI.getOperand(0));
544 } else {
545 EmitNullBytes(SRC_BYTE_COUNT);
546 }
547
548 // Emit FC Instruction
549 enum FCInstr instr;
550 switch (MI.getOpcode()) {
551 case AMDGPU::BREAK_LOGICALZ_f32:
552 instr = FC_BREAK;
553 break;
554 case AMDGPU::BREAK_LOGICALNZ_f32:
555 instr = FC_BREAK_NZ;
556 break;
557 case AMDGPU::BREAK_LOGICALNZ_i32:
558 instr = FC_BREAK_NZ_INT;
559 break;
560 case AMDGPU::BREAK_LOGICALZ_i32:
561 instr = FC_BREAK_Z_INT;
562 break;
563 case AMDGPU::CONTINUE_LOGICALNZ_f32:
564 case AMDGPU::CONTINUE_LOGICALNZ_i32:
565 instr = FC_CONTINUE;
566 break;
567 case AMDGPU::IF_LOGICALNZ_f32:
568 instr = FC_IF;
569 case AMDGPU::IF_LOGICALNZ_i32:
570 instr = FC_IF_INT;
571 break;
572 case AMDGPU::IF_LOGICALZ_f32:
573 abort();
574 break;
575 case AMDGPU::ELSE:
576 instr = FC_ELSE;
577 break;
578 case AMDGPU::ENDIF:
579 instr = FC_ENDIF;
580 break;
581 case AMDGPU::ENDLOOP:
582 instr = FC_ENDLOOP;
583 break;
584 case AMDGPU::WHILELOOP:
585 instr = FC_BGNLOOP;
586 break;
587 default:
588 abort();
589 break;
590 }
591 EmitByte(instr);
592 }
593
594 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
595 {
596 for (unsigned int i = 0; i < byteCount; i++) {
597 EmitByte(0);
598 }
599 }
600
601 void R600CodeEmitter::EmitByte(unsigned int byte)
602 {
603 _OS.write((uint8_t) byte & 0xff);
604 }
605 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
606 {
607 _OS.write((uint8_t) (bytes & 0xff));
608 _OS.write((uint8_t) ((bytes >> 8) & 0xff));
609 }
610
611 void R600CodeEmitter::Emit(uint32_t value)
612 {
613 for (unsigned i = 0; i < 4; i++) {
614 _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
615 }
616 }
617
618 void R600CodeEmitter::Emit(uint64_t value)
619 {
620 for (unsigned i = 0; i < 8; i++) {
621 EmitByte((value >> (8 * i)) & 0xff);
622 }
623 }
624
625 unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
626 {
627 unsigned HWReg;
628
629 HWReg = TRI->getHWRegIndex(regNo);
630 if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
631 HWReg += 512;
632 }
633 return HWReg;
634 }
635
636 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
637 const MachineOperand &MO) const
638 {
639 if (MO.isReg()) {
640 return getHWReg(MO.getReg());
641 } else {
642 return MO.getImm();
643 }
644 }
645
646 #include "AMDGPUGenCodeEmitter.inc"
647