nvc0: add support for PIPE_CAP_SAMPLE_SHADING
[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir.h
1 /*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #ifndef __NV50_IR_H__
24 #define __NV50_IR_H__
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <deque>
30 #include <list>
31 #include <vector>
32
33 #include "codegen/nv50_ir_util.h"
34 #include "codegen/nv50_ir_graph.h"
35
36 #include "codegen/nv50_ir_driver.h"
37
38 namespace nv50_ir {
39
40 enum operation
41 {
42 OP_NOP = 0,
43 OP_PHI,
44 OP_UNION, // unify a new definition and several source values
45 OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)
46 OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value
47 OP_CONSTRAINT, // copy values into consecutive registers
48 OP_MOV, // simple copy, no modifiers allowed
49 OP_LOAD,
50 OP_STORE,
51 OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds
52 OP_SUB,
53 OP_MUL,
54 OP_DIV,
55 OP_MOD,
56 OP_MAD,
57 OP_FMA,
58 OP_SAD, // abs(src0 - src1) + src2
59 OP_ABS,
60 OP_NEG,
61 OP_NOT,
62 OP_AND,
63 OP_OR,
64 OP_XOR,
65 OP_SHL,
66 OP_SHR,
67 OP_MAX,
68 OP_MIN,
69 OP_SAT, // CLAMP(f32, 0.0, 1.0)
70 OP_CEIL,
71 OP_FLOOR,
72 OP_TRUNC,
73 OP_CVT,
74 OP_SET_AND, // dst = (src0 CMP src1) & src2
75 OP_SET_OR,
76 OP_SET_XOR,
77 OP_SET,
78 OP_SELP, // dst = src2 ? src0 : src1
79 OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1
80 OP_RCP,
81 OP_RSQ,
82 OP_LG2,
83 OP_SIN,
84 OP_COS,
85 OP_EX2,
86 OP_EXP, // exponential (base M_E)
87 OP_LOG, // natural logarithm
88 OP_PRESIN,
89 OP_PREEX2,
90 OP_SQRT,
91 OP_POW,
92 OP_BRA,
93 OP_CALL,
94 OP_RET,
95 OP_CONT,
96 OP_BREAK,
97 OP_PRERET,
98 OP_PRECONT,
99 OP_PREBREAK,
100 OP_BRKPT, // breakpoint (not related to loops)
101 OP_JOINAT, // push control flow convergence point
102 OP_JOIN, // converge
103 OP_DISCARD,
104 OP_EXIT,
105 OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
106 OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
107 OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
108 OP_EXPORT,
109 OP_LINTERP,
110 OP_PINTERP,
111 OP_EMIT, // emit vertex
112 OP_RESTART, // restart primitive
113 OP_TEX,
114 OP_TXB, // texture bias
115 OP_TXL, // texure lod
116 OP_TXF, // texel fetch
117 OP_TXQ, // texture size query
118 OP_TXD, // texture derivatives
119 OP_TXG, // texture gather
120 OP_TXLQ, // texture query lod
121 OP_TEXCSAA, // texture op for coverage sampling
122 OP_TEXPREP, // turn cube map array into 2d array coordinates
123 OP_SULDB, // surface load (raw)
124 OP_SULDP, // surface load (formatted)
125 OP_SUSTB, // surface store (raw)
126 OP_SUSTP, // surface store (formatted)
127 OP_SUREDB,
128 OP_SUREDP, // surface reduction (atomic op)
129 OP_SULEA, // surface load effective address
130 OP_SUBFM, // surface bitfield manipulation
131 OP_SUCLAMP, // clamp surface coordinates
132 OP_SUEAU, // surface effective address
133 OP_MADSP, // special integer multiply-add
134 OP_TEXBAR, // texture dependency barrier
135 OP_DFDX,
136 OP_DFDY,
137 OP_RDSV, // read system value
138 OP_WRSV, // write system value
139 OP_PIXLD, // get info about raster object or surfaces
140 OP_QUADOP,
141 OP_QUADON,
142 OP_QUADPOP,
143 OP_POPCNT, // bitcount(src0 & src1)
144 OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
145 OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
146 OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
147 OP_ATOM,
148 OP_BAR, // execution barrier, sources = { id, thread count, predicate }
149 OP_VADD, // byte/word vector operations
150 OP_VAVG,
151 OP_VMIN,
152 OP_VMAX,
153 OP_VSAD,
154 OP_VSET,
155 OP_VSHR,
156 OP_VSHL,
157 OP_VSEL,
158 OP_CCTL, // cache control
159 OP_LAST
160 };
161
162 // various instruction-specific modifier definitions Instruction::subOp
163 // MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)
164 #define NV50_IR_SUBOP_MUL_HIGH 1
165 #define NV50_IR_SUBOP_EMIT_RESTART 1
166 #define NV50_IR_SUBOP_LDC_IL 1
167 #define NV50_IR_SUBOP_LDC_IS 2
168 #define NV50_IR_SUBOP_LDC_ISL 3
169 #define NV50_IR_SUBOP_SHIFT_WRAP 1
170 #define NV50_IR_SUBOP_EMU_PRERET 1
171 #define NV50_IR_SUBOP_TEXBAR(n) n
172 #define NV50_IR_SUBOP_MOV_FINAL 1
173 #define NV50_IR_SUBOP_EXTBF_REV 1
174 #define NV50_IR_SUBOP_PERMT_F4E 1
175 #define NV50_IR_SUBOP_PERMT_B4E 2
176 #define NV50_IR_SUBOP_PERMT_RC8 3
177 #define NV50_IR_SUBOP_PERMT_ECL 4
178 #define NV50_IR_SUBOP_PERMT_ECR 5
179 #define NV50_IR_SUBOP_PERMT_RC16 6
180 #define NV50_IR_SUBOP_BAR_SYNC 0
181 #define NV50_IR_SUBOP_BAR_ARRIVE 1
182 #define NV50_IR_SUBOP_BAR_RED_AND 2
183 #define NV50_IR_SUBOP_BAR_RED_OR 3
184 #define NV50_IR_SUBOP_BAR_RED_POPC 4
185 #define NV50_IR_SUBOP_MEMBAR_L 1
186 #define NV50_IR_SUBOP_MEMBAR_S 2
187 #define NV50_IR_SUBOP_MEMBAR_M 3
188 #define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)
189 #define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)
190 #define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)
191 #define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)
192 #define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)
193 #define NV50_IR_SUBOP_MEMBAR(d,s) \
194 (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)
195 #define NV50_IR_SUBOP_ATOM_ADD 0
196 #define NV50_IR_SUBOP_ATOM_MIN 1
197 #define NV50_IR_SUBOP_ATOM_MAX 2
198 #define NV50_IR_SUBOP_ATOM_INC 3
199 #define NV50_IR_SUBOP_ATOM_DEC 4
200 #define NV50_IR_SUBOP_ATOM_AND 5
201 #define NV50_IR_SUBOP_ATOM_OR 6
202 #define NV50_IR_SUBOP_ATOM_XOR 7
203 #define NV50_IR_SUBOP_ATOM_CAS 8
204 #define NV50_IR_SUBOP_ATOM_EXCH 9
205 #define NV50_IR_SUBOP_CCTL_IV 5
206 #define NV50_IR_SUBOP_CCTL_IVALL 6
207 #define NV50_IR_SUBOP_SUST_IGN 0
208 #define NV50_IR_SUBOP_SUST_TRAP 1
209 #define NV50_IR_SUBOP_SUST_SDCL 3
210 #define NV50_IR_SUBOP_SULD_ZERO 0
211 #define NV50_IR_SUBOP_SULD_TRAP 1
212 #define NV50_IR_SUBOP_SULD_SDCL 3
213 #define NV50_IR_SUBOP_SUBFM_3D 1
214 #define NV50_IR_SUBOP_SUCLAMP_2D 0x10
215 #define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))
216 #define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))
217 #define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))
218 #define NV50_IR_SUBOP_PIXLD_COUNT 0
219 #define NV50_IR_SUBOP_PIXLD_COVMASK 1
220 #define NV50_IR_SUBOP_PIXLD_COVERED 2
221 #define NV50_IR_SUBOP_PIXLD_OFFSET 3
222 #define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4
223 #define NV50_IR_SUBOP_PIXLD_SAMPLEID 5
224 #define NV50_IR_SUBOP_MADSP_SD 0xffff
225 // Yes, we could represent those with DataType.
226 // Or put the type into operation and have a couple 1000 values in that enum.
227 // This will have to do for now.
228 // The bitfields are supposed to correspond to nve4 ISA.
229 #define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
230 #define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
231 #define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
232 #define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
233 #define NV50_IR_SUBOP_Vn(n) ((n) >> 14)
234
235 enum DataType
236 {
237 TYPE_NONE,
238 TYPE_U8,
239 TYPE_S8,
240 TYPE_U16,
241 TYPE_S16,
242 TYPE_U32,
243 TYPE_S32,
244 TYPE_U64, // 64 bit operations are only lowered after register allocation
245 TYPE_S64,
246 TYPE_F16,
247 TYPE_F32,
248 TYPE_F64,
249 TYPE_B96,
250 TYPE_B128
251 };
252
253 enum CondCode
254 {
255 CC_FL = 0,
256 CC_NEVER = CC_FL, // when used with FILE_FLAGS
257 CC_LT = 1,
258 CC_EQ = 2,
259 CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE
260 CC_LE = 3,
261 CC_GT = 4,
262 CC_NE = 5,
263 CC_P = CC_NE,
264 CC_GE = 6,
265 CC_TR = 7,
266 CC_ALWAYS = CC_TR,
267 CC_U = 8,
268 CC_LTU = 9,
269 CC_EQU = 10,
270 CC_LEU = 11,
271 CC_GTU = 12,
272 CC_NEU = 13,
273 CC_GEU = 14,
274 CC_NO = 0x10,
275 CC_NC = 0x11,
276 CC_NS = 0x12,
277 CC_NA = 0x13,
278 CC_A = 0x14,
279 CC_S = 0x15,
280 CC_C = 0x16,
281 CC_O = 0x17
282 };
283
284 enum RoundMode
285 {
286 ROUND_N, // nearest
287 ROUND_M, // towards -inf
288 ROUND_Z, // towards 0
289 ROUND_P, // towards +inf
290 ROUND_NI, // nearest integer
291 ROUND_MI, // to integer towards -inf
292 ROUND_ZI, // to integer towards 0
293 ROUND_PI, // to integer towards +inf
294 };
295
296 enum CacheMode
297 {
298 CACHE_CA, // cache at all levels
299 CACHE_WB = CACHE_CA, // cache write back
300 CACHE_CG, // cache at global level
301 CACHE_CS, // cache streaming
302 CACHE_CV, // cache as volatile
303 CACHE_WT = CACHE_CV // cache write-through
304 };
305
306 enum DataFile
307 {
308 FILE_NULL = 0,
309 FILE_GPR,
310 FILE_PREDICATE, // boolean predicate
311 FILE_FLAGS, // zero/sign/carry/overflow bits
312 FILE_ADDRESS,
313 LAST_REGISTER_FILE = FILE_ADDRESS,
314 FILE_IMMEDIATE,
315 FILE_MEMORY_CONST,
316 FILE_SHADER_INPUT,
317 FILE_SHADER_OUTPUT,
318 FILE_MEMORY_GLOBAL,
319 FILE_MEMORY_SHARED,
320 FILE_MEMORY_LOCAL,
321 FILE_SYSTEM_VALUE,
322 DATA_FILE_COUNT
323 };
324
325 enum TexTarget
326 {
327 TEX_TARGET_1D,
328 TEX_TARGET_2D,
329 TEX_TARGET_2D_MS,
330 TEX_TARGET_3D,
331 TEX_TARGET_CUBE,
332 TEX_TARGET_1D_SHADOW,
333 TEX_TARGET_2D_SHADOW,
334 TEX_TARGET_CUBE_SHADOW,
335 TEX_TARGET_1D_ARRAY,
336 TEX_TARGET_2D_ARRAY,
337 TEX_TARGET_2D_MS_ARRAY,
338 TEX_TARGET_CUBE_ARRAY,
339 TEX_TARGET_1D_ARRAY_SHADOW,
340 TEX_TARGET_2D_ARRAY_SHADOW,
341 TEX_TARGET_RECT,
342 TEX_TARGET_RECT_SHADOW,
343 TEX_TARGET_CUBE_ARRAY_SHADOW,
344 TEX_TARGET_BUFFER,
345 TEX_TARGET_COUNT
346 };
347
348 enum SVSemantic
349 {
350 SV_POSITION, // WPOS
351 SV_VERTEX_ID,
352 SV_INSTANCE_ID,
353 SV_INVOCATION_ID,
354 SV_PRIMITIVE_ID,
355 SV_VERTEX_COUNT, // gl_PatchVerticesIn
356 SV_LAYER,
357 SV_VIEWPORT_INDEX,
358 SV_YDIR,
359 SV_FACE,
360 SV_POINT_SIZE,
361 SV_POINT_COORD,
362 SV_CLIP_DISTANCE,
363 SV_SAMPLE_INDEX,
364 SV_SAMPLE_POS,
365 SV_TESS_FACTOR,
366 SV_TESS_COORD,
367 SV_TID,
368 SV_CTAID,
369 SV_NTID,
370 SV_GRIDID,
371 SV_NCTAID,
372 SV_LANEID,
373 SV_PHYSID,
374 SV_NPHYSID,
375 SV_CLOCK,
376 SV_LBASE,
377 SV_SBASE,
378 SV_VERTEX_STRIDE,
379 SV_UNDEFINED,
380 SV_LAST
381 };
382
383 class Program;
384 class Function;
385 class BasicBlock;
386
387 class Target;
388
389 class Instruction;
390 class CmpInstruction;
391 class TexInstruction;
392 class FlowInstruction;
393
394 class Value;
395 class LValue;
396 class Symbol;
397 class ImmediateValue;
398
399 struct Storage
400 {
401 DataFile file;
402 int8_t fileIndex; // signed, may be indirect for CONST[]
403 uint8_t size; // this should match the Instruction type's size
404 DataType type; // mainly for pretty printing
405 union {
406 uint64_t u64; // immediate values
407 uint32_t u32;
408 uint16_t u16;
409 uint8_t u8;
410 int64_t s64;
411 int32_t s32;
412 int16_t s16;
413 int8_t s8;
414 float f32;
415 double f64;
416 int32_t offset; // offset from 0 (base of address space)
417 int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)
418 struct {
419 SVSemantic sv;
420 int index;
421 } sv;
422 } data;
423 };
424
425 // precedence: NOT after SAT after NEG after ABS
426 #define NV50_IR_MOD_ABS (1 << 0)
427 #define NV50_IR_MOD_NEG (1 << 1)
428 #define NV50_IR_MOD_SAT (1 << 2)
429 #define NV50_IR_MOD_NOT (1 << 3)
430 #define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)
431
432 #define NV50_IR_INTERP_MODE_MASK 0x3
433 #define NV50_IR_INTERP_LINEAR (0 << 0)
434 #define NV50_IR_INTERP_PERSPECTIVE (1 << 0)
435 #define NV50_IR_INTERP_FLAT (2 << 0)
436 #define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?
437 #define NV50_IR_INTERP_SAMPLE_MASK 0xc
438 #define NV50_IR_INTERP_DEFAULT (0 << 2)
439 #define NV50_IR_INTERP_CENTROID (1 << 2)
440 #define NV50_IR_INTERP_OFFSET (2 << 2)
441 #define NV50_IR_INTERP_SAMPLEID (3 << 2)
442
443 // do we really want this to be a class ?
444 class Modifier
445 {
446 public:
447 Modifier() : bits(0) { }
448 Modifier(unsigned int m) : bits(m) { }
449 Modifier(operation op);
450
451 // @return new Modifier applying a after b (asserts if unrepresentable)
452 Modifier operator*(const Modifier) const;
453 Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }
454 Modifier operator==(const Modifier m) const { return m.bits == bits; }
455 Modifier operator!=(const Modifier m) const { return m.bits != bits; }
456
457 inline Modifier operator&(const Modifier m) const { return bits & m.bits; }
458 inline Modifier operator|(const Modifier m) const { return bits | m.bits; }
459 inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }
460
461 operation getOp() const;
462
463 inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }
464 inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }
465
466 inline operator bool() const { return bits ? true : false; }
467
468 void applyTo(ImmediateValue &imm) const;
469
470 int print(char *buf, size_t size) const;
471
472 private:
473 uint8_t bits;
474 };
475
476 class ValueRef
477 {
478 public:
479 ValueRef(Value * = NULL);
480 ValueRef(const ValueRef&);
481 ~ValueRef();
482
483 inline bool exists() const { return value != NULL; }
484
485 void set(Value *);
486 void set(const ValueRef&);
487 inline Value *get() const { return value; }
488 inline Value *rep() const;
489
490 inline Instruction *getInsn() const { return insn; }
491 inline void setInsn(Instruction *inst) { insn = inst; }
492
493 inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }
494 inline const ValueRef *getIndirect(int dim) const;
495
496 inline DataFile getFile() const;
497 inline unsigned getSize() const;
498
499 // SSA: return eventual (traverse MOVs) literal value, if it exists
500 bool getImmediate(ImmediateValue&) const;
501
502 public:
503 Modifier mod;
504 int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])
505 uint8_t swizzle;
506
507 bool usedAsPtr; // for printing
508
509 private:
510 Value *value;
511 Instruction *insn;
512 };
513
514 class ValueDef
515 {
516 public:
517 ValueDef(Value * = NULL);
518 ValueDef(const ValueDef&);
519 ~ValueDef();
520
521 inline bool exists() const { return value != NULL; }
522
523 inline Value *get() const { return value; }
524 inline Value *rep() const;
525 void set(Value *);
526 bool mayReplace(const ValueRef &);
527 void replace(const ValueRef &, bool doSet); // replace all uses of the old value
528
529 inline Instruction *getInsn() const { return insn; }
530 inline void setInsn(Instruction *inst) { insn = inst; }
531
532 inline DataFile getFile() const;
533 inline unsigned getSize() const;
534
535 inline void setSSA(LValue *);
536 inline const LValue *preSSA() const;
537
538 private:
539 Value *value; // should make this LValue * ...
540 LValue *origin; // pre SSA value
541 Instruction *insn;
542 };
543
544 class Value
545 {
546 public:
547 Value();
548 virtual ~Value() { }
549
550 virtual Value *clone(ClonePolicy<Function>&) const = 0;
551
552 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;
553
554 virtual bool equals(const Value *, bool strict = false) const;
555 virtual bool interfers(const Value *) const;
556 virtual bool isUniform() const { return true; }
557
558 inline Value *rep() const { return join; }
559
560 inline Instruction *getUniqueInsn() const;
561 inline Instruction *getInsn() const; // use when uniqueness is certain
562
563 inline int refCount() { return uses.size(); }
564
565 inline LValue *asLValue();
566 inline Symbol *asSym();
567 inline ImmediateValue *asImm();
568 inline const Symbol *asSym() const;
569 inline const ImmediateValue *asImm() const;
570
571 inline bool inFile(DataFile f) { return reg.file == f; }
572
573 static inline Value *get(Iterator&);
574
575 std::list<ValueRef *> uses;
576 std::list<ValueDef *> defs;
577 typedef std::list<ValueRef *>::iterator UseIterator;
578 typedef std::list<ValueRef *>::const_iterator UseCIterator;
579 typedef std::list<ValueDef *>::iterator DefIterator;
580 typedef std::list<ValueDef *>::const_iterator DefCIterator;
581
582 int id;
583 Storage reg;
584
585 // TODO: these should be in LValue:
586 Interval livei;
587 Value *join;
588 };
589
590 class LValue : public Value
591 {
592 public:
593 LValue(Function *, DataFile file);
594 LValue(Function *, LValue *);
595 ~LValue() { }
596
597 virtual bool isUniform() const;
598
599 virtual LValue *clone(ClonePolicy<Function>&) const;
600
601 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
602
603 public:
604 unsigned compMask : 8; // compound/component mask
605 unsigned compound : 1; // used by RA, value involved in split/merge
606 unsigned ssa : 1;
607 unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)
608 unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)
609 };
610
611 class Symbol : public Value
612 {
613 public:
614 Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);
615 ~Symbol() { }
616
617 virtual Symbol *clone(ClonePolicy<Function>&) const;
618
619 virtual bool equals(const Value *that, bool strict) const;
620
621 virtual bool isUniform() const;
622
623 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
624
625 // print with indirect values
626 int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;
627
628 inline void setFile(DataFile file, ubyte fileIndex = 0)
629 {
630 reg.file = file;
631 reg.fileIndex = fileIndex;
632 }
633
634 inline void setOffset(int32_t offset);
635 inline void setAddress(Symbol *base, int32_t offset);
636 inline void setSV(SVSemantic sv, uint32_t idx = 0);
637
638 inline const Symbol *getBase() const { return baseSym; }
639
640 private:
641 Symbol *baseSym; // array base for Symbols representing array elements
642 };
643
644 class ImmediateValue : public Value
645 {
646 public:
647 ImmediateValue() { }
648 ImmediateValue(Program *, uint32_t);
649 ImmediateValue(Program *, float);
650 ImmediateValue(Program *, double);
651 // NOTE: not added to program with
652 ImmediateValue(const ImmediateValue *, DataType ty);
653 ~ImmediateValue() { };
654
655 virtual ImmediateValue *clone(ClonePolicy<Function>&) const;
656
657 virtual bool equals(const Value *that, bool strict) const;
658
659 // these only work if 'type' is valid (we mostly use untyped literals):
660 bool isInteger(const int ival) const; // ival is cast to this' type
661 bool isNegative() const;
662 bool isPow2() const;
663
664 void applyLog2();
665
666 // for constant folding:
667 ImmediateValue operator+(const ImmediateValue&) const;
668 ImmediateValue operator-(const ImmediateValue&) const;
669 ImmediateValue operator*(const ImmediateValue&) const;
670 ImmediateValue operator/(const ImmediateValue&) const;
671
672 ImmediateValue& operator=(const ImmediateValue&); // only sets value !
673
674 bool compare(CondCode cc, float fval) const;
675
676 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;
677 };
678
679 class Instruction
680 {
681 public:
682 Instruction();
683 Instruction(Function *, operation, DataType);
684 virtual ~Instruction();
685
686 virtual Instruction *clone(ClonePolicy<Function>&,
687 Instruction * = NULL) const;
688
689 void setDef(int i, Value *);
690 void setSrc(int s, Value *);
691 void setSrc(int s, const ValueRef&);
692 void swapSources(int a, int b);
693 void moveSources(int s, int delta);
694 bool setIndirect(int s, int dim, Value *);
695
696 inline ValueRef& src(int s) { return srcs[s]; }
697 inline ValueDef& def(int s) { return defs[s]; }
698 inline const ValueRef& src(int s) const { return srcs[s]; }
699 inline const ValueDef& def(int s) const { return defs[s]; }
700
701 inline Value *getDef(int d) const { return defs[d].get(); }
702 inline Value *getSrc(int s) const { return srcs[s].get(); }
703 inline Value *getIndirect(int s, int dim) const;
704
705 inline bool defExists(unsigned d) const
706 {
707 return d < defs.size() && defs[d].exists();
708 }
709 inline bool srcExists(unsigned s) const
710 {
711 return s < srcs.size() && srcs[s].exists();
712 }
713
714 inline bool constrainedDefs() const;
715
716 bool setPredicate(CondCode ccode, Value *);
717 inline Value *getPredicate() const;
718 bool writesPredicate() const;
719 inline bool isPredicated() const { return predSrc >= 0; }
720
721 inline void setFlagsSrc(int s, Value *);
722 inline void setFlagsDef(int d, Value *);
723 inline bool usesFlags() const { return flagsSrc >= 0; }
724
725 unsigned int defCount() const { return defs.size(); };
726 unsigned int defCount(unsigned int mask, bool singleFile = false) const;
727 unsigned int srcCount() const { return srcs.size(); };
728 unsigned int srcCount(unsigned int mask, bool singleFile = false) const;
729
730 // save & remove / set indirect[0,1] and predicate source
731 void takeExtraSources(int s, Value *[3]);
732 void putExtraSources(int s, Value *[3]);
733
734 inline void setType(DataType type) { dType = sType = type; }
735
736 inline void setType(DataType dtype, DataType stype)
737 {
738 dType = dtype;
739 sType = stype;
740 }
741
742 inline bool isPseudo() const { return op < OP_MOV; }
743 bool isDead() const;
744 bool isNop() const;
745 bool isCommutationLegal(const Instruction *) const; // must be adjacent !
746 bool isActionEqual(const Instruction *) const;
747 bool isResultEqual(const Instruction *) const;
748
749 void print() const;
750
751 inline CmpInstruction *asCmp();
752 inline TexInstruction *asTex();
753 inline FlowInstruction *asFlow();
754 inline const TexInstruction *asTex() const;
755 inline const CmpInstruction *asCmp() const;
756 inline const FlowInstruction *asFlow() const;
757
758 public:
759 Instruction *next;
760 Instruction *prev;
761 int id;
762 int serial; // CFG order
763
764 operation op;
765 DataType dType; // destination or defining type
766 DataType sType; // source or secondary type
767 CondCode cc;
768 RoundMode rnd;
769 CacheMode cache;
770
771 uint16_t subOp; // quadop, 1 for mul-high, etc.
772
773 unsigned encSize : 4; // encoding size in bytes
774 unsigned saturate : 1; // to [0.0f, 1.0f]
775 unsigned join : 1; // converge control flow (use OP_JOIN until end)
776 unsigned fixed : 1; // prevent dead code elimination
777 unsigned terminator : 1; // end of basic block
778 unsigned ftz : 1; // flush denormal to zero
779 unsigned dnz : 1; // denormals, NaN are zero
780 unsigned ipa : 4; // interpolation mode
781 unsigned lanes : 4;
782 unsigned perPatch : 1;
783 unsigned exit : 1; // terminate program after insn
784 unsigned mask : 4; // for vector ops
785
786 int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor
787
788 int8_t predSrc;
789 int8_t flagsDef;
790 int8_t flagsSrc;
791
792 uint8_t sched; // scheduling data (NOTE: maybe move to separate storage)
793
794 BasicBlock *bb;
795
796 protected:
797 std::deque<ValueDef> defs; // no gaps !
798 std::deque<ValueRef> srcs; // no gaps !
799
800 // instruction specific methods:
801 // (don't want to subclass, would need more constructors and memory pools)
802 public:
803 inline void setInterpolate(unsigned int mode) { ipa = mode; }
804
805 unsigned int getInterpMode() const { return ipa & 0x3; }
806 unsigned int getSampleMode() const { return ipa & 0xc; }
807
808 private:
809 void init();
810 };
811
812 enum TexQuery
813 {
814 TXQ_DIMS,
815 TXQ_TYPE,
816 TXQ_SAMPLE_POSITION,
817 TXQ_FILTER,
818 TXQ_LOD,
819 TXQ_WRAP,
820 TXQ_BORDER_COLOUR
821 };
822
823 class TexInstruction : public Instruction
824 {
825 public:
826 class Target
827 {
828 public:
829 Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { }
830
831 const char *getName() const { return descTable[target].name; }
832 unsigned int getArgCount() const { return descTable[target].argc; }
833 unsigned int getDim() const { return descTable[target].dim; }
834 int isArray() const { return descTable[target].array ? 1 : 0; }
835 int isCube() const { return descTable[target].cube ? 1 : 0; }
836 int isShadow() const { return descTable[target].shadow ? 1 : 0; }
837 int isMS() const {
838 return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }
839 void clearMS() {
840 if (isMS()) {
841 if (isArray())
842 target = TEX_TARGET_2D_ARRAY;
843 else
844 target = TEX_TARGET_2D;
845 }
846 }
847
848 Target& operator=(TexTarget targ)
849 {
850 assert(targ < TEX_TARGET_COUNT);
851 target = targ;
852 return *this;
853 }
854
855 inline bool operator==(TexTarget targ) const { return target == targ; }
856 inline bool operator!=(TexTarget targ) const { return target != targ; }
857
858 enum TexTarget getEnum() const { return target; }
859
860 private:
861 struct Desc
862 {
863 char name[19];
864 uint8_t dim;
865 uint8_t argc;
866 bool array;
867 bool cube;
868 bool shadow;
869 };
870
871 static const struct Desc descTable[TEX_TARGET_COUNT];
872
873 private:
874 enum TexTarget target;
875 };
876
877 public:
878 TexInstruction(Function *, operation);
879 virtual ~TexInstruction();
880
881 virtual TexInstruction *clone(ClonePolicy<Function>&,
882 Instruction * = NULL) const;
883
884 inline void setTexture(Target targ, uint8_t r, uint8_t s)
885 {
886 tex.r = r;
887 tex.s = s;
888 tex.target = targ;
889 }
890
891 void setIndirectR(Value *);
892 void setIndirectS(Value *);
893 inline Value *getIndirectR() const;
894 inline Value *getIndirectS() const;
895
896 public:
897 struct {
898 Target target;
899
900 uint16_t r;
901 uint16_t s;
902 int8_t rIndirectSrc;
903 int8_t sIndirectSrc;
904
905 uint8_t mask;
906 uint8_t gatherComp;
907
908 bool liveOnly; // only execute on live pixels of a quad (optimization)
909 bool levelZero;
910 bool derivAll;
911
912 int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
913 int8_t offset[4][3];
914
915 enum TexQuery query;
916 } tex;
917
918 ValueRef dPdx[3];
919 ValueRef dPdy[3];
920 };
921
922 class CmpInstruction : public Instruction
923 {
924 public:
925 CmpInstruction(Function *, operation);
926
927 virtual CmpInstruction *clone(ClonePolicy<Function>&,
928 Instruction * = NULL) const;
929
930 void setCondition(CondCode cond) { setCond = cond; }
931 CondCode getCondition() const { return setCond; }
932
933 public:
934 CondCode setCond;
935 };
936
937 class FlowInstruction : public Instruction
938 {
939 public:
940 FlowInstruction(Function *, operation, void *target);
941
942 virtual FlowInstruction *clone(ClonePolicy<Function>&,
943 Instruction * = NULL) const;
944
945 public:
946 unsigned allWarp : 1;
947 unsigned absolute : 1;
948 unsigned limit : 1;
949 unsigned builtin : 1; // true for calls to emulation code
950 unsigned indirect : 1; // target in src(0)
951
952 union {
953 BasicBlock *bb;
954 int builtin;
955 Function *fn;
956 } target;
957 };
958
959 class BasicBlock
960 {
961 public:
962 BasicBlock(Function *);
963 ~BasicBlock();
964
965 BasicBlock *clone(ClonePolicy<Function>&) const;
966
967 inline int getId() const { return id; }
968 inline unsigned int getInsnCount() const { return numInsns; }
969 inline bool isTerminated() const { return exit && exit->terminator; }
970
971 bool dominatedBy(BasicBlock *bb);
972 inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);
973
974 // returns mask of conditional out blocks
975 // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF
976 unsigned int initiatesSimpleConditional() const;
977
978 public:
979 Function *getFunction() const { return func; }
980 Program *getProgram() const { return program; }
981
982 Instruction *getEntry() const { return entry; } // first non-phi instruction
983 Instruction *getPhi() const { return phi; }
984 Instruction *getFirst() const { return phi ? phi : entry; }
985 Instruction *getExit() const { return exit; }
986
987 void insertHead(Instruction *);
988 void insertTail(Instruction *);
989 void insertBefore(Instruction *, Instruction *);
990 void insertAfter(Instruction *, Instruction *);
991 void remove(Instruction *);
992 void permuteAdjacent(Instruction *, Instruction *);
993
994 BasicBlock *idom() const;
995
996 // NOTE: currently does not rebuild the dominator tree
997 BasicBlock *splitBefore(Instruction *, bool attach = true);
998 BasicBlock *splitAfter(Instruction *, bool attach = true);
999
1000 DLList& getDF() { return df; }
1001 DLList::Iterator iterDF() { return df.iterator(); }
1002
1003 static inline BasicBlock *get(Iterator&);
1004 static inline BasicBlock *get(Graph::Node *);
1005
1006 public:
1007 Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)
1008 Graph::Node dom;
1009
1010 BitSet liveSet;
1011 BitSet defSet;
1012
1013 uint32_t binPos;
1014 uint32_t binSize;
1015
1016 Instruction *joinAt; // for quick reference
1017
1018 bool explicitCont; // loop headers: true if loop contains continue stmts
1019
1020 private:
1021 int id;
1022 DLList df;
1023
1024 Instruction *phi;
1025 Instruction *entry;
1026 Instruction *exit;
1027
1028 unsigned int numInsns;
1029
1030 private:
1031 Function *func;
1032 Program *program;
1033
1034 void splitCommon(Instruction *, BasicBlock *, bool attach);
1035 };
1036
1037 class Function
1038 {
1039 public:
1040 Function(Program *, const char *name, uint32_t label);
1041 ~Function();
1042
1043 static inline Function *get(Graph::Node *node);
1044
1045 inline Program *getProgram() const { return prog; }
1046 inline const char *getName() const { return name; }
1047 inline int getId() const { return id; }
1048 inline uint32_t getLabel() const { return label; }
1049
1050 void print();
1051 void printLiveIntervals() const;
1052 void printCFGraph(const char *filePath);
1053
1054 bool setEntry(BasicBlock *);
1055 bool setExit(BasicBlock *);
1056
1057 unsigned int orderInstructions(ArrayList&);
1058
1059 inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }
1060 inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }
1061 inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }
1062
1063 inline LValue *getLValue(int id);
1064
1065 void buildLiveSets();
1066 void buildDefSets();
1067 bool convertToSSA();
1068
1069 public:
1070 std::deque<ValueDef> ins;
1071 std::deque<ValueRef> outs;
1072 std::deque<Value *> clobbers;
1073
1074 Graph cfg;
1075 Graph::Node *cfgExit;
1076 Graph *domTree;
1077 Graph::Node call; // node in the call graph
1078
1079 BasicBlock **bbArray; // BBs in emission order
1080 int bbCount;
1081
1082 unsigned int loopNestingBound;
1083 int regClobberMax;
1084
1085 uint32_t binPos;
1086 uint32_t binSize;
1087
1088 Value *stackPtr;
1089
1090 uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)
1091 uint32_t tlsSize;
1092
1093 ArrayList allBBlocks;
1094 ArrayList allInsns;
1095 ArrayList allLValues;
1096
1097 private:
1098 void buildLiveSetsPreSSA(BasicBlock *, const int sequence);
1099 void buildDefSetsPreSSA(BasicBlock *bb, const int seq);
1100
1101 private:
1102 uint32_t label;
1103 int id;
1104 const char *const name;
1105 Program *prog;
1106 };
1107
1108 enum CGStage
1109 {
1110 CG_STAGE_PRE_SSA,
1111 CG_STAGE_SSA, // expected directly before register allocation
1112 CG_STAGE_POST_RA
1113 };
1114
1115 class Program
1116 {
1117 public:
1118 enum Type
1119 {
1120 TYPE_VERTEX,
1121 TYPE_TESSELLATION_CONTROL,
1122 TYPE_TESSELLATION_EVAL,
1123 TYPE_GEOMETRY,
1124 TYPE_FRAGMENT,
1125 TYPE_COMPUTE
1126 };
1127
1128 Program(Type type, Target *targ);
1129 ~Program();
1130
1131 void print();
1132
1133 Type getType() const { return progType; }
1134
1135 inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }
1136 inline void del(Function *fn, int& id) { allFuncs.remove(id); }
1137 inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
1138
1139 bool makeFromTGSI(struct nv50_ir_prog_info *);
1140 bool makeFromSM4(struct nv50_ir_prog_info *);
1141 bool convertToSSA();
1142 bool optimizeSSA(int level);
1143 bool optimizePostRA(int level);
1144 bool registerAllocation();
1145 bool emitBinary(struct nv50_ir_prog_info *);
1146
1147 const Target *getTarget() const { return target; }
1148
1149 private:
1150 void emitSymbolTable(struct nv50_ir_prog_info *);
1151
1152 Type progType;
1153 Target *target;
1154
1155 public:
1156 Function *main;
1157 Graph calls;
1158
1159 ArrayList allFuncs;
1160 ArrayList allRValues;
1161
1162 uint32_t *code;
1163 uint32_t binSize;
1164 uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
1165
1166 int maxGPR;
1167
1168 MemoryPool mem_Instruction;
1169 MemoryPool mem_CmpInstruction;
1170 MemoryPool mem_TexInstruction;
1171 MemoryPool mem_FlowInstruction;
1172 MemoryPool mem_LValue;
1173 MemoryPool mem_Symbol;
1174 MemoryPool mem_ImmediateValue;
1175
1176 uint32_t dbgFlags;
1177 uint8_t optLevel;
1178
1179 void *targetPriv; // e.g. to carry information between passes
1180
1181 const struct nv50_ir_prog_info *driver; // for driver configuration
1182
1183 void releaseInstruction(Instruction *);
1184 void releaseValue(Value *);
1185 };
1186
1187 // TODO: add const version
1188 class Pass
1189 {
1190 public:
1191 bool run(Program *, bool ordered = false, bool skipPhi = false);
1192 bool run(Function *, bool ordered = false, bool skipPhi = false);
1193
1194 private:
1195 // return false to continue with next entity on next higher level
1196 virtual bool visit(Function *) { return true; }
1197 virtual bool visit(BasicBlock *) { return true; }
1198 virtual bool visit(Instruction *) { return false; }
1199
1200 bool doRun(Program *, bool ordered, bool skipPhi);
1201 bool doRun(Function *, bool ordered, bool skipPhi);
1202
1203 protected:
1204 bool err;
1205 Function *func;
1206 Program *prog;
1207 };
1208
1209 // =============================================================================
1210
1211 #include "codegen/nv50_ir_inlines.h"
1212
1213 } // namespace nv50_ir
1214
1215 #endif // __NV50_IR_H__