radeon/llvm: Move lowering of SETCC node to R600ISelLowering
[mesa.git] / src / gallium / drivers / radeon / R600CodeEmitter.cpp
1 //===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This code emitters outputs bytecode that is understood by the r600g driver
11 // in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
12 // except that the size of the instruction fields are rounded up to the
13 // nearest byte.
14 //
15 // [1] http://www.mesa3d.org/
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "AMDGPU.h"
20 #include "AMDGPUUtil.h"
21 #include "AMDILCodeEmitter.h"
22 #include "AMDILInstrInfo.h"
23 #include "AMDILUtilityFunctions.h"
24 #include "R600InstrInfo.h"
25 #include "R600RegisterInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Support/FormattedStream.h"
31 #include "llvm/Target/TargetMachine.h"
32
33 #include <stdio.h>
34
35 #define SRC_BYTE_COUNT 11
36 #define DST_BYTE_COUNT 5
37
38 using namespace llvm;
39
40 namespace {
41
42 class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
43
44 private:
45
46 static char ID;
47 formatted_raw_ostream &_OS;
48 const TargetMachine * TM;
49 const MachineRegisterInfo * MRI;
50 const R600RegisterInfo * TRI;
51
52 bool IsCube;
53 bool IsReduction;
54 bool IsVector;
55 unsigned currentElement;
56 bool IsLast;
57
58 unsigned section_start;
59
60 public:
61
62 R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
63 _OS(OS), TM(NULL), IsCube(false), IsReduction(false), IsVector(false),
64 IsLast(true) { }
65
66 const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
67
68 bool runOnMachineFunction(MachineFunction &MF);
69 virtual uint64_t getMachineOpValue(const MachineInstr &MI,
70 const MachineOperand &MO) const;
71
72 private:
73
74 void EmitALUInstr(MachineInstr &MI);
75 void EmitSrc(const MachineOperand & MO, int chan_override = -1);
76 void EmitDst(const MachineOperand & MO);
77 void EmitALU(MachineInstr &MI, unsigned numSrc);
78 void EmitTexInstr(MachineInstr &MI);
79 void EmitFCInstr(MachineInstr &MI);
80
81 void EmitNullBytes(unsigned int byteCount);
82
83 void EmitByte(unsigned int byte);
84
85 void EmitTwoBytes(uint32_t bytes);
86
87 void Emit(uint32_t value);
88 void Emit(uint64_t value);
89
90 unsigned getHWReg(unsigned regNo) const;
91
92 };
93
94 } // End anonymous namespace
95
96 enum RegElement {
97 ELEMENT_X = 0,
98 ELEMENT_Y,
99 ELEMENT_Z,
100 ELEMENT_W
101 };
102
103 enum InstrTypes {
104 INSTR_ALU = 0,
105 INSTR_TEX,
106 INSTR_FC,
107 INSTR_NATIVE,
108 INSTR_VTX
109 };
110
111 enum FCInstr {
112 FC_IF = 0,
113 FC_IF_INT,
114 FC_ELSE,
115 FC_ENDIF,
116 FC_BGNLOOP,
117 FC_ENDLOOP,
118 FC_BREAK,
119 FC_BREAK_NZ_INT,
120 FC_CONTINUE,
121 FC_BREAK_Z_INT
122 };
123
124 enum TextureTypes {
125 TEXTURE_1D = 1,
126 TEXTURE_2D,
127 TEXTURE_3D,
128 TEXTURE_CUBE,
129 TEXTURE_RECT,
130 TEXTURE_SHADOW1D,
131 TEXTURE_SHADOW2D,
132 TEXTURE_SHADOWRECT,
133 TEXTURE_1D_ARRAY,
134 TEXTURE_2D_ARRAY,
135 TEXTURE_SHADOW1D_ARRAY,
136 TEXTURE_SHADOW2D_ARRAY
137 };
138
139 char R600CodeEmitter::ID = 0;
140
141 FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) {
142 return new R600CodeEmitter(OS);
143 }
144
145 bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
146
147 TM = &MF.getTarget();
148 MRI = &MF.getRegInfo();
149 TRI = static_cast<const R600RegisterInfo *>(TM->getRegisterInfo());
150 const R600InstrInfo * TII = static_cast<const R600InstrInfo *>(TM->getInstrInfo());
151 const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
152 std::string gpu = STM.getDeviceName();
153
154 if (STM.dumpCode()) {
155 MF.dump();
156 }
157
158 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
159 BB != BB_E; ++BB) {
160 MachineBasicBlock &MBB = *BB;
161 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
162 I != E; ++I) {
163 MachineInstr &MI = *I;
164 IsReduction = AMDGPU::isReductionOp(MI.getOpcode());
165 IsVector = TII->isVector(MI);
166 IsCube = AMDGPU::isCubeOp(MI.getOpcode());
167 if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
168 continue;
169 }
170 if (AMDGPU::isTexOp(MI.getOpcode())) {
171 EmitTexInstr(MI);
172 } else if (AMDGPU::isFCOp(MI.getOpcode())){
173 EmitFCInstr(MI);
174 } else if (IsReduction || IsVector || IsCube) {
175 IsLast = false;
176 // XXX: On Cayman, some (all?) of the vector instructions only need
177 // to fill the first three slots.
178 for (currentElement = 0; currentElement < 4; currentElement++) {
179 IsLast = (currentElement == 3);
180 EmitALUInstr(MI);
181 }
182 IsReduction = false;
183 IsVector = false;
184 IsCube = false;
185 } else if (MI.getOpcode() == AMDGPU::RETURN ||
186 MI.getOpcode() == AMDGPU::BUNDLE ||
187 MI.getOpcode() == AMDGPU::KILL) {
188 continue;
189 } else {
190 switch(MI.getOpcode()) {
191 case AMDGPU::RAT_WRITE_CACHELESS_eg:
192 {
193 uint64_t inst = getBinaryCodeForInstr(MI);
194 // Set End Of Program bit
195 // XXX: Need better check of end of program. EOP should be
196 // encoded in one of the operands of the MI, and it should be
197 // set in a prior pass.
198 MachineBasicBlock::iterator NextI = llvm::next(I);
199 MachineInstr &NextMI = *NextI;
200 if (NextMI.getOpcode() == AMDGPU::RETURN) {
201 inst |= (((uint64_t)1) << 53);
202 }
203 EmitByte(INSTR_NATIVE);
204 Emit(inst);
205 break;
206 }
207 case AMDGPU::VTX_READ_PARAM_i32_eg:
208 case AMDGPU::VTX_READ_PARAM_f32_eg:
209 case AMDGPU::VTX_READ_GLOBAL_i32_eg:
210 case AMDGPU::VTX_READ_GLOBAL_f32_eg:
211 case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
212 case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
213 {
214 uint64_t InstWord01 = getBinaryCodeForInstr(MI);
215 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
216
217 EmitByte(INSTR_VTX);
218 Emit(InstWord01);
219 Emit(InstWord2);
220 break;
221 }
222
223 default:
224 EmitALUInstr(MI);
225 break;
226 }
227 }
228 }
229 }
230 return false;
231 }
232
233 void R600CodeEmitter::EmitALUInstr(MachineInstr &MI)
234 {
235
236 unsigned numOperands = MI.getNumExplicitOperands();
237
238 // Some instructions are just place holder instructions that represent
239 // operations that the GPU does automatically. They should be ignored.
240 if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
241 return;
242 }
243
244 // XXX Check if instruction writes a result
245 if (numOperands < 1) {
246 return;
247 }
248 const MachineOperand dstOp = MI.getOperand(0);
249
250 // Emit instruction type
251 EmitByte(0);
252
253 if (IsCube) {
254 static const int cube_src_swz[] = {2, 2, 0, 1};
255 EmitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
256 EmitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
257 EmitNullBytes(SRC_BYTE_COUNT);
258 } else {
259 unsigned int opIndex;
260 for (opIndex = 1; opIndex < numOperands; opIndex++) {
261 // Literal constants are always stored as the last operand.
262 if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
263 break;
264 }
265 EmitSrc(MI.getOperand(opIndex));
266 }
267
268 // Emit zeros for unused sources
269 for ( ; opIndex < 4; opIndex++) {
270 EmitNullBytes(SRC_BYTE_COUNT);
271 }
272 }
273
274 EmitDst(dstOp);
275
276 EmitALU(MI, numOperands - 1);
277 }
278
279 void R600CodeEmitter::EmitSrc(const MachineOperand & MO, int chan_override)
280 {
281 uint32_t value = 0;
282 // Emit the source select (2 bytes). For GPRs, this is the register index.
283 // For other potential instruction operands, (e.g. constant registers) the
284 // value of the source select is defined in the r600isa docs.
285 if (MO.isReg()) {
286 unsigned reg = MO.getReg();
287 EmitTwoBytes(getHWReg(reg));
288 if (reg == AMDGPU::ALU_LITERAL_X) {
289 const MachineInstr * parent = MO.getParent();
290 unsigned immOpIndex = parent->getNumExplicitOperands() - 1;
291 MachineOperand immOp = parent->getOperand(immOpIndex);
292 if (immOp.isFPImm()) {
293 value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue();
294 } else {
295 assert(immOp.isImm());
296 value = immOp.getImm();
297 }
298 }
299 } else {
300 // XXX: Handle other operand types.
301 EmitTwoBytes(0);
302 }
303
304 // Emit the source channel (1 byte)
305 if (chan_override != -1) {
306 EmitByte(chan_override);
307 } else if (IsReduction) {
308 EmitByte(currentElement);
309 } else if (MO.isReg()) {
310 EmitByte(TRI->getHWRegChan(MO.getReg()));
311 } else {
312 EmitByte(0);
313 }
314
315 // XXX: Emit isNegated (1 byte)
316 if ((!(MO.getTargetFlags() & MO_FLAG_ABS))
317 && (MO.getTargetFlags() & MO_FLAG_NEG ||
318 (MO.isReg() &&
319 (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
320 EmitByte(1);
321 } else {
322 EmitByte(0);
323 }
324
325 // Emit isAbsolute (1 byte)
326 if (MO.getTargetFlags() & MO_FLAG_ABS) {
327 EmitByte(1);
328 } else {
329 EmitByte(0);
330 }
331
332 // XXX: Emit relative addressing mode (1 byte)
333 EmitByte(0);
334
335 // Emit kc_bank, This will be adjusted later by r600_asm
336 EmitByte(0);
337
338 // Emit the literal value, if applicable (4 bytes).
339 Emit(value);
340
341 }
342
343 void R600CodeEmitter::EmitDst(const MachineOperand & MO)
344 {
345 if (MO.isReg()) {
346 // Emit the destination register index (1 byte)
347 EmitByte(getHWReg(MO.getReg()));
348
349 // Emit the element of the destination register (1 byte)
350 if (IsReduction || IsCube || IsVector) {
351 EmitByte(currentElement);
352 } else {
353 EmitByte(TRI->getHWRegChan(MO.getReg()));
354 }
355
356 // Emit isClamped (1 byte)
357 if (MO.getTargetFlags() & MO_FLAG_CLAMP) {
358 EmitByte(1);
359 } else {
360 EmitByte(0);
361 }
362
363 // Emit writemask (1 byte).
364 if (((IsReduction || IsVector) &&
365 currentElement != TRI->getHWRegChan(MO.getReg()))
366 || MO.getTargetFlags() & MO_FLAG_MASK) {
367 EmitByte(0);
368 } else {
369 EmitByte(1);
370 }
371
372 // XXX: Emit relative addressing mode
373 EmitByte(0);
374 } else {
375 // XXX: Handle other operand types. Are there any for destination regs?
376 EmitNullBytes(DST_BYTE_COUNT);
377 }
378 }
379
380 void R600CodeEmitter::EmitALU(MachineInstr &MI, unsigned numSrc)
381 {
382 // Emit the instruction (2 bytes)
383 EmitTwoBytes(getBinaryCodeForInstr(MI));
384
385 // Emit IsLast (for this instruction group) (1 byte)
386 if (IsLast) {
387 EmitByte(1);
388 } else {
389 EmitByte(0);
390 }
391 // Emit isOp3 (1 byte)
392 if (numSrc == 3) {
393 EmitByte(1);
394 } else {
395 EmitByte(0);
396 }
397
398 // XXX: Emit predicate (1 byte)
399 EmitByte(0);
400
401 // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like
402 // r600_asm.c sets it.
403 EmitByte(0);
404
405 // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for.
406 EmitByte(0);
407
408 // XXX: Emit OMOD (1 byte) Not implemented.
409 EmitByte(0);
410
411 // XXX: Emit index_mode. I think this is for indirect addressing, so we
412 // don't need to worry about it.
413 EmitByte(0);
414 }
415
416 void R600CodeEmitter::EmitTexInstr(MachineInstr &MI)
417 {
418
419 unsigned opcode = MI.getOpcode();
420 bool hasOffsets = (opcode == AMDGPU::TEX_LD);
421 unsigned op_offset = hasOffsets ? 3 : 0;
422 int64_t sampler = MI.getOperand(op_offset+2).getImm();
423 int64_t textureType = MI.getOperand(op_offset+3).getImm();
424 unsigned srcSelect[4] = {0, 1, 2, 3};
425
426 // Emit instruction type
427 EmitByte(1);
428
429 // Emit instruction
430 EmitByte(getBinaryCodeForInstr(MI));
431
432 // XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
433 EmitByte(sampler + 1 + 1);
434
435 // Emit source register
436 EmitByte(getHWReg(MI.getOperand(1).getReg()));
437
438 // XXX: Emit src isRelativeAddress
439 EmitByte(0);
440
441 // Emit destination register
442 EmitByte(getHWReg(MI.getOperand(0).getReg()));
443
444 // XXX: Emit dst isRealtiveAddress
445 EmitByte(0);
446
447 // XXX: Emit dst select
448 EmitByte(0); // X
449 EmitByte(1); // Y
450 EmitByte(2); // Z
451 EmitByte(3); // W
452
453 // XXX: Emit lod bias
454 EmitByte(0);
455
456 // XXX: Emit coord types
457 unsigned coordType[4] = {1, 1, 1, 1};
458
459 if (textureType == TEXTURE_RECT
460 || textureType == TEXTURE_SHADOWRECT) {
461 coordType[ELEMENT_X] = 0;
462 coordType[ELEMENT_Y] = 0;
463 }
464
465 if (textureType == TEXTURE_1D_ARRAY
466 || textureType == TEXTURE_SHADOW1D_ARRAY) {
467 if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
468 coordType[ELEMENT_Y] = 0;
469 } else {
470 coordType[ELEMENT_Z] = 0;
471 srcSelect[ELEMENT_Z] = ELEMENT_Y;
472 }
473 } else if (textureType == TEXTURE_2D_ARRAY
474 || textureType == TEXTURE_SHADOW2D_ARRAY) {
475 coordType[ELEMENT_Z] = 0;
476 }
477
478 for (unsigned i = 0; i < 4; i++) {
479 EmitByte(coordType[i]);
480 }
481
482 // XXX: Emit offsets
483 if (hasOffsets)
484 for (unsigned i = 2; i < 5; i++)
485 EmitByte(MI.getOperand(i).getImm()<<1);
486 else
487 EmitNullBytes(3);
488
489 // Emit sampler id
490 EmitByte(sampler);
491
492 // XXX:Emit source select
493 if ((textureType == TEXTURE_SHADOW1D
494 || textureType == TEXTURE_SHADOW2D
495 || textureType == TEXTURE_SHADOWRECT
496 || textureType == TEXTURE_SHADOW1D_ARRAY)
497 && opcode != AMDGPU::TEX_SAMPLE_C_L
498 && opcode != AMDGPU::TEX_SAMPLE_C_LB) {
499 srcSelect[ELEMENT_W] = ELEMENT_Z;
500 }
501
502 for (unsigned i = 0; i < 4; i++) {
503 EmitByte(srcSelect[i]);
504 }
505 }
506
507 void R600CodeEmitter::EmitFCInstr(MachineInstr &MI)
508 {
509 // Emit instruction type
510 EmitByte(INSTR_FC);
511
512 // Emit SRC
513 unsigned numOperands = MI.getNumOperands();
514 if (numOperands > 0) {
515 assert(numOperands == 1);
516 EmitSrc(MI.getOperand(0));
517 } else {
518 EmitNullBytes(SRC_BYTE_COUNT);
519 }
520
521 // Emit FC Instruction
522 enum FCInstr instr;
523 switch (MI.getOpcode()) {
524 case AMDGPU::BREAK_LOGICALZ_f32:
525 instr = FC_BREAK;
526 break;
527 case AMDGPU::BREAK_LOGICALNZ_f32:
528 case AMDGPU::BREAK_LOGICALNZ_i32:
529 instr = FC_BREAK_NZ_INT;
530 break;
531 case AMDGPU::BREAK_LOGICALZ_i32:
532 instr = FC_BREAK_Z_INT;
533 break;
534 case AMDGPU::CONTINUE_LOGICALNZ_f32:
535 case AMDGPU::CONTINUE_LOGICALNZ_i32:
536 instr = FC_CONTINUE;
537 break;
538 case AMDGPU::IF_LOGICALNZ_f32:
539 instr = FC_IF;
540 case AMDGPU::IF_LOGICALNZ_i32:
541 instr = FC_IF_INT;
542 break;
543 case AMDGPU::IF_LOGICALZ_f32:
544 abort();
545 break;
546 case AMDGPU::ELSE:
547 instr = FC_ELSE;
548 break;
549 case AMDGPU::ENDIF:
550 instr = FC_ENDIF;
551 break;
552 case AMDGPU::ENDLOOP:
553 instr = FC_ENDLOOP;
554 break;
555 case AMDGPU::WHILELOOP:
556 instr = FC_BGNLOOP;
557 break;
558 default:
559 abort();
560 break;
561 }
562 EmitByte(instr);
563 }
564
565 void R600CodeEmitter::EmitNullBytes(unsigned int byteCount)
566 {
567 for (unsigned int i = 0; i < byteCount; i++) {
568 EmitByte(0);
569 }
570 }
571
572 void R600CodeEmitter::EmitByte(unsigned int byte)
573 {
574 _OS.write((uint8_t) byte & 0xff);
575 }
576 void R600CodeEmitter::EmitTwoBytes(unsigned int bytes)
577 {
578 _OS.write((uint8_t) (bytes & 0xff));
579 _OS.write((uint8_t) ((bytes >> 8) & 0xff));
580 }
581
582 void R600CodeEmitter::Emit(uint32_t value)
583 {
584 for (unsigned i = 0; i < 4; i++) {
585 _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
586 }
587 }
588
589 void R600CodeEmitter::Emit(uint64_t value)
590 {
591 for (unsigned i = 0; i < 8; i++) {
592 EmitByte((value >> (8 * i)) & 0xff);
593 }
594 }
595
596 unsigned R600CodeEmitter::getHWReg(unsigned regNo) const
597 {
598 unsigned HWReg;
599
600 HWReg = TRI->getHWRegIndex(regNo);
601 if (AMDGPU::R600_CReg32RegClass.contains(regNo)) {
602 HWReg += 512;
603 }
604 return HWReg;
605 }
606
607 uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI,
608 const MachineOperand &MO) const
609 {
610 if (MO.isReg()) {
611 return getHWReg(MO.getReg());
612 } else {
613 return MO.getImm();
614 }
615 }
616
617 #include "AMDGPUGenCodeEmitter.inc"
618