nvc0: try to fix register conflicts for vector instructions
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc.h
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #ifndef __NVC0_COMPILER_H__
24 #define __NVC0_COMPILER_H__
25
26 #include <stdio.h>
27
28 #ifndef NOUVEAU_DBG
29 #ifdef NOUVEAU_DEBUG
30 # define NOUVEAU_DBG(args...) debug_printf(args);
31 #else
32 # define NOUVEAU_DBG(args...)
33 #endif
34 #endif
35
36 #ifndef NOUVEAU_ERR
37 #define NOUVEAU_ERR(fmt, args...) \
38 fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args);
39 #endif
40
41 #include "pipe/p_defines.h"
42 #include "util/u_inlines.h"
43 #include "util/u_memory.h"
44 #include "util/u_double_list.h"
45
46 /* pseudo opcodes */
47 #define NV_OP_UNDEF 0
48 #define NV_OP_BIND 1
49 #define NV_OP_MERGE 2
50 #define NV_OP_PHI 3
51 #define NV_OP_SELECT 4
52 #define NV_OP_NOP 5
53
54 /**
55 * BIND forces source operand i into the same register as destination operand i,
56 * and the operands will be assigned consecutive registers (needed for TEX).
57 * Beware conflicts !
58 * SELECT forces its multiple source operands and its destination operand into
59 * one and the same register.
60 */
61
62 /* base opcodes */
63 #define NV_OP_LD 6
64 #define NV_OP_ST 7
65 #define NV_OP_MOV 8
66 #define NV_OP_AND 9
67 #define NV_OP_OR 10
68 #define NV_OP_XOR 11
69 #define NV_OP_SHL 12
70 #define NV_OP_SHR 13
71 #define NV_OP_NOT 14
72 #define NV_OP_SET 15
73 #define NV_OP_ADD 16
74 #define NV_OP_SUB 17
75 #define NV_OP_MUL 18
76 #define NV_OP_MAD 19
77 #define NV_OP_ABS 20
78 #define NV_OP_NEG 21
79 #define NV_OP_MAX 22
80 #define NV_OP_MIN 23
81 #define NV_OP_CVT 24
82 #define NV_OP_CEIL 25
83 #define NV_OP_FLOOR 26
84 #define NV_OP_TRUNC 27
85 #define NV_OP_SAD 28
86
87 /* shader opcodes */
88 #define NV_OP_VFETCH 29
89 #define NV_OP_PFETCH 30
90 #define NV_OP_EXPORT 31
91 #define NV_OP_LINTERP 32
92 #define NV_OP_PINTERP 33
93 #define NV_OP_EMIT 34
94 #define NV_OP_RESTART 35
95 #define NV_OP_TEX 36
96 #define NV_OP_TXB 37
97 #define NV_OP_TXL 38
98 #define NV_OP_TXF 39
99 #define NV_OP_TXQ 40
100 #define NV_OP_QUADOP 41
101 #define NV_OP_DFDX 42
102 #define NV_OP_DFDY 43
103 #define NV_OP_KIL 44
104
105 /* control flow opcodes */
106 #define NV_OP_BRA 45
107 #define NV_OP_CALL 46
108 #define NV_OP_RET 47
109 #define NV_OP_EXIT 48
110 #define NV_OP_BREAK 49
111 #define NV_OP_BREAKADDR 50
112 #define NV_OP_JOINAT 51
113 #define NV_OP_JOIN 52
114
115 /* typed opcodes */
116 #define NV_OP_ADD_F32 NV_OP_ADD
117 #define NV_OP_ADD_B32 53
118 #define NV_OP_MUL_F32 NV_OP_MUL
119 #define NV_OP_MUL_B32 54
120 #define NV_OP_ABS_F32 NV_OP_ABS
121 #define NV_OP_ABS_S32 55
122 #define NV_OP_NEG_F32 NV_OP_NEG
123 #define NV_OP_NEG_S32 56
124 #define NV_OP_MAX_F32 NV_OP_MAX
125 #define NV_OP_MAX_S32 57
126 #define NV_OP_MAX_U32 58
127 #define NV_OP_MIN_F32 NV_OP_MIN
128 #define NV_OP_MIN_S32 59
129 #define NV_OP_MIN_U32 60
130 #define NV_OP_SET_F32 61
131 #define NV_OP_SET_S32 62
132 #define NV_OP_SET_U32 63
133 #define NV_OP_SAR 64
134 #define NV_OP_RCP 65
135 #define NV_OP_RSQ 66
136 #define NV_OP_LG2 67
137 #define NV_OP_SIN 68
138 #define NV_OP_COS 69
139 #define NV_OP_EX2 70
140 #define NV_OP_PRESIN 71
141 #define NV_OP_PREEX2 72
142 #define NV_OP_SAT 73
143
144 /* newly added opcodes */
145 #define NV_OP_SET_F32_AND 74
146 #define NV_OP_SET_F32_OR 75
147 #define NV_OP_SET_F32_XOR 76
148 #define NV_OP_SELP 77
149 #define NV_OP_SLCT 78
150 #define NV_OP_SLCT_F32 NV_OP_SLCT
151 #define NV_OP_SLCT_S32 79
152 #define NV_OP_SLCT_U32 80
153 #define NV_OP_SUB_F32 NV_OP_SUB
154 #define NV_OP_SUB_S32 81
155 #define NV_OP_MAD_F32 NV_OP_MAD
156 #define NV_OP_FSET_F32 82
157 #define NV_OP_TXG 83
158
159 #define NV_OP_COUNT 84
160
161 /* nv50 files omitted */
162 #define NV_FILE_GPR 0
163 #define NV_FILE_COND 1
164 #define NV_FILE_PRED 2
165 #define NV_FILE_IMM 16
166 #define NV_FILE_MEM_S 32
167 #define NV_FILE_MEM_V 34
168 #define NV_FILE_MEM_A 35
169 #define NV_FILE_MEM_L 48
170 #define NV_FILE_MEM_G 64
171 #define NV_FILE_MEM_C(i) (80 + i)
172
173 #define NV_IS_MEMORY_FILE(f) ((f) >= NV_FILE_MEM_S)
174
175 #define NV_MOD_NEG 1
176 #define NV_MOD_ABS 2
177 #define NV_MOD_NOT 4
178 #define NV_MOD_SAT 8
179
180 #define NV_TYPE_U8 0x00
181 #define NV_TYPE_S8 0x01
182 #define NV_TYPE_U16 0x02
183 #define NV_TYPE_S16 0x03
184 #define NV_TYPE_U32 0x04
185 #define NV_TYPE_S32 0x05
186 #define NV_TYPE_P32 0x07
187 #define NV_TYPE_F32 0x09
188 #define NV_TYPE_F64 0x0b
189 #define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
190 #define NV_TYPE_ANY 0xff
191
192 #define NV_TYPE_ISINT(t) ((t) < 7)
193 #define NV_TYPE_ISSGD(t) ((t) & 1)
194
195 #define NV_CC_FL 0x0
196 #define NV_CC_LT 0x1
197 #define NV_CC_EQ 0x2
198 #define NV_CC_LE 0x3
199 #define NV_CC_GT 0x4
200 #define NV_CC_NE 0x5
201 #define NV_CC_GE 0x6
202 #define NV_CC_U 0x8
203 #define NV_CC_TR 0xf
204 #define NV_CC_O 0x10
205 #define NV_CC_C 0x11
206 #define NV_CC_A 0x12
207 #define NV_CC_S 0x13
208
209 #define NV_PC_MAX_INSTRUCTIONS 2048
210 #define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
211
212 #define NV_PC_MAX_BASIC_BLOCKS 1024
213
214 struct nv_op_info {
215 uint base; /* e.g. ADD_S32 -> ADD */
216 char name[12];
217 uint8_t type;
218 uint8_t mods;
219 unsigned flow : 1;
220 unsigned commutative : 1;
221 unsigned vector : 1;
222 unsigned predicate : 1;
223 unsigned pseudo : 1;
224 unsigned immediate : 3;
225 unsigned memory : 3;
226 };
227
228 extern struct nv_op_info nvc0_op_info_table[];
229
230 #define NV_BASEOP(op) (nvc0_op_info_table[op].base)
231 #define NV_OPTYPE(op) (nvc0_op_info_table[op].type)
232
233 static INLINE uint
234 nv_op_base(uint opcode)
235 {
236 return nvc0_op_info_table[opcode].base;
237 }
238
239 static INLINE boolean
240 nv_is_texture_op(uint opcode)
241 {
242 return (opcode >= NV_OP_TEX && opcode <= NV_OP_TXQ);
243 }
244
245 static INLINE boolean
246 nv_is_vector_op(uint opcode)
247 {
248 return nvc0_op_info_table[opcode].vector ? TRUE : FALSE;
249 }
250
251 static INLINE boolean
252 nv_op_commutative(uint opcode)
253 {
254 return nvc0_op_info_table[opcode].commutative ? TRUE : FALSE;
255 }
256
257 static INLINE uint8_t
258 nv_op_supported_src_mods(uint opcode)
259 {
260 return nvc0_op_info_table[opcode].mods;
261 }
262
263 static INLINE boolean
264 nv_op_predicateable(uint opcode)
265 {
266 return nvc0_op_info_table[opcode].predicate ? TRUE : FALSE;
267 }
268
269 static INLINE uint
270 nv_type_order(ubyte type)
271 {
272 switch (type & 0xf) {
273 case NV_TYPE_U8:
274 case NV_TYPE_S8:
275 return 0;
276 case NV_TYPE_U16:
277 case NV_TYPE_S16:
278 return 1;
279 case NV_TYPE_U32:
280 case NV_TYPE_F32:
281 case NV_TYPE_S32:
282 case NV_TYPE_P32:
283 return 2;
284 case NV_TYPE_F64:
285 return 3;
286 }
287 assert(0);
288 return 0;
289 }
290
291 static INLINE uint
292 nv_type_sizeof(ubyte type)
293 {
294 if (type & 0xf0)
295 return (1 << nv_type_order(type)) * (type >> 4);
296 return 1 << nv_type_order(type);
297 }
298
299 static INLINE uint
300 nv_type_sizeof_base(ubyte type)
301 {
302 return 1 << nv_type_order(type);
303 }
304
305 struct nv_reg {
306 uint32_t address; /* for memory locations */
307 int id; /* for registers */
308 ubyte file;
309 ubyte size;
310 union {
311 int32_t s32;
312 int64_t s64;
313 uint64_t u64;
314 uint32_t u32; /* expected to be 0 for $r63 */
315 float f32;
316 double f64;
317 } imm;
318 };
319
320 struct nv_range {
321 struct nv_range *next;
322 int bgn;
323 int end;
324 };
325
326 struct nv_ref;
327
328 struct nv_value {
329 struct nv_reg reg;
330 struct nv_instruction *insn;
331 struct nv_value *join;
332 struct nv_ref *last_use;
333 int n;
334 struct nv_range *livei;
335 int refc;
336 struct nv_value *next;
337 struct nv_value *prev;
338 };
339
340 struct nv_ref {
341 struct nv_value *value;
342 struct nv_instruction *insn;
343 struct list_head list; /* connects uses of the same value */
344 uint8_t mod;
345 uint8_t flags;
346 };
347
348 struct nv_basic_block;
349
350 struct nv_instruction {
351 struct nv_instruction *next;
352 struct nv_instruction *prev;
353 uint opcode;
354 uint serial;
355
356 struct nv_value *def[5];
357 struct nv_ref *src[6];
358
359 int8_t predicate; /* index of predicate src */
360 int8_t indirect; /* index of pointer src */
361
362 union {
363 struct {
364 uint8_t t; /* TIC binding */
365 uint8_t s; /* TSC binding */
366 } tex;
367 struct {
368 uint8_t d; /* output type */
369 uint8_t s; /* input type */
370 } cvt;
371 } ext;
372
373 struct nv_basic_block *bb;
374 struct nv_basic_block *target; /* target block of control flow insn */
375
376 unsigned cc : 5; /* condition code */
377 unsigned fixed : 1; /* don't optimize away (prematurely) */
378 unsigned terminator : 1;
379 unsigned join : 1;
380 unsigned set_cond : 4; /* 2nd byte */
381 unsigned saturate : 1;
382 unsigned centroid : 1;
383 unsigned flat : 1;
384 unsigned patch : 1;
385 unsigned lanes : 4; /* 3rd byte */
386 unsigned tex_dim : 2;
387 unsigned tex_array : 1;
388 unsigned tex_cube : 1;
389 unsigned tex_shadow : 1; /* 4th byte */
390 unsigned tex_live : 1;
391 unsigned tex_mask : 4;
392
393 uint8_t quadop;
394 };
395
396 static INLINE int
397 nvi_vector_size(struct nv_instruction *nvi)
398 {
399 int i;
400 assert(nvi);
401 for (i = 0; i < 5 && nvi->def[i]; ++i);
402 return i;
403 }
404
405 #define CFG_EDGE_FORWARD 0
406 #define CFG_EDGE_BACK 1
407 #define CFG_EDGE_LOOP_ENTER 2
408 #define CFG_EDGE_LOOP_LEAVE 4
409 #define CFG_EDGE_FAKE 8
410
411 /* 'WALL' edge means where reachability check doesn't follow */
412 /* 'LOOP' edge means just having to do with loops */
413 #define IS_LOOP_EDGE(k) ((k) & 7)
414 #define IS_WALL_EDGE(k) ((k) & 9)
415
416 struct nv_basic_block {
417 struct nv_instruction *entry; /* first non-phi instruction */
418 struct nv_instruction *exit;
419 struct nv_instruction *phi; /* very first instruction */
420 int num_instructions;
421
422 struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
423 struct nv_basic_block *in[8]; /* hope that suffices */
424 uint num_in;
425 ubyte out_kind[2];
426 ubyte in_kind[8];
427
428 int id;
429 int subroutine;
430 uint priv; /* reset to 0 after you're done */
431 uint pass_seq;
432
433 uint32_t emit_pos; /* position, size in emitted code (in bytes) */
434 uint32_t emit_size;
435
436 uint32_t live_set[NV_PC_MAX_VALUES / 32];
437 };
438
439 struct nvc0_translation_info;
440
441 struct nv_pc {
442 struct nv_basic_block **root;
443 struct nv_basic_block *current_block;
444 struct nv_basic_block *parent_block;
445
446 int loop_nesting_bound;
447 uint pass_seq;
448
449 struct nv_value values[NV_PC_MAX_VALUES];
450 struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
451 struct nv_ref **refs;
452 struct nv_basic_block *bb_list[NV_PC_MAX_BASIC_BLOCKS];
453 int num_values;
454 int num_instructions;
455 int num_refs;
456 int num_blocks;
457 int num_subroutines;
458
459 int max_reg[4];
460
461 uint32_t *immd_buf; /* populated on emit */
462 unsigned immd_count;
463
464 uint32_t *emit;
465 uint32_t emit_size;
466 uint32_t emit_pos;
467
468 void *reloc_entries;
469 unsigned num_relocs;
470
471 /* optimization enables */
472 boolean opt_reload_elim;
473 boolean is_fragprog;
474 };
475
476 void nvc0_insn_append(struct nv_basic_block *, struct nv_instruction *);
477 void nvc0_insn_insert_before(struct nv_instruction *, struct nv_instruction *);
478 void nvc0_insn_insert_after(struct nv_instruction *, struct nv_instruction *);
479
480 static INLINE struct nv_instruction *
481 nv_alloc_instruction(struct nv_pc *pc, uint opcode)
482 {
483 struct nv_instruction *insn;
484
485 insn = &pc->instructions[pc->num_instructions++];
486 assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
487
488 insn->opcode = opcode;
489 insn->cc = 0;
490 insn->indirect = -1;
491 insn->predicate = -1;
492
493 return insn;
494 }
495
496 static INLINE struct nv_instruction *
497 new_instruction(struct nv_pc *pc, uint opcode)
498 {
499 struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
500
501 nvc0_insn_append(pc->current_block, insn);
502 return insn;
503 }
504
505 static INLINE struct nv_instruction *
506 new_instruction_at(struct nv_pc *pc, struct nv_instruction *at, uint opcode)
507 {
508 struct nv_instruction *insn = nv_alloc_instruction(pc, opcode);
509
510 nvc0_insn_insert_after(at, insn);
511 return insn;
512 }
513
514 static INLINE struct nv_value *
515 new_value(struct nv_pc *pc, ubyte file, ubyte size)
516 {
517 struct nv_value *value = &pc->values[pc->num_values];
518
519 assert(pc->num_values < NV_PC_MAX_VALUES - 1);
520
521 value->n = pc->num_values++;
522 value->join = value;
523 value->reg.id = -1;
524 value->reg.file = file;
525 value->reg.size = size;
526 return value;
527 }
528
529 static INLINE struct nv_value *
530 new_value_like(struct nv_pc *pc, struct nv_value *like)
531 {
532 return new_value(pc, like->reg.file, like->reg.size);
533 }
534
535 static INLINE struct nv_ref *
536 new_ref(struct nv_pc *pc, struct nv_value *val)
537 {
538 int i;
539 struct nv_ref *ref;
540
541 if ((pc->num_refs % 64) == 0) {
542 const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
543 const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
544
545 pc->refs = REALLOC(pc->refs, old_size, new_size);
546
547 ref = CALLOC(64, sizeof(struct nv_ref));
548 for (i = 0; i < 64; ++i)
549 pc->refs[pc->num_refs + i] = &ref[i];
550 }
551
552 ref = pc->refs[pc->num_refs++];
553 ref->value = val;
554
555 LIST_INITHEAD(&ref->list);
556
557 ++val->refc;
558 return ref;
559 }
560
561 static INLINE struct nv_basic_block *
562 new_basic_block(struct nv_pc *pc)
563 {
564 struct nv_basic_block *bb;
565
566 if (pc->num_blocks >= NV_PC_MAX_BASIC_BLOCKS)
567 return NULL;
568
569 bb = CALLOC_STRUCT(nv_basic_block);
570
571 bb->id = pc->num_blocks;
572 pc->bb_list[pc->num_blocks++] = bb;
573 return bb;
574 }
575
576 static INLINE void
577 nv_reference(struct nv_pc *pc,
578 struct nv_instruction *nvi, int c, struct nv_value *s)
579 {
580 struct nv_ref **d = &nvi->src[c];
581 assert(c < 6);
582
583 if (*d) {
584 --(*d)->value->refc;
585 LIST_DEL(&(*d)->list);
586 }
587
588 if (s) {
589 if (!*d) {
590 *d = new_ref(pc, s);
591 (*d)->insn = nvi;
592 } else {
593 LIST_DEL(&(*d)->list);
594 (*d)->value = s;
595 ++(s->refc);
596 }
597 if (!s->last_use)
598 s->last_use = *d;
599 else
600 LIST_ADDTAIL(&s->last_use->list, &(*d)->list);
601
602 s->last_use = *d;
603 (*d)->insn = nvi;
604 } else {
605 *d = NULL;
606 }
607 }
608
609 /* nvc0_emit.c */
610 void nvc0_emit_instruction(struct nv_pc *, struct nv_instruction *);
611
612 /* nvc0_print.c */
613 const char *nvc0_opcode_name(uint opcode);
614 void nvc0_print_instruction(struct nv_instruction *);
615
616 /* nvc0_pc.c */
617 void nvc0_print_function(struct nv_basic_block *root);
618 void nvc0_print_program(struct nv_pc *);
619
620 boolean nvc0_insn_can_load(struct nv_instruction *, int s,
621 struct nv_instruction *);
622 boolean nvc0_insn_is_predicateable(struct nv_instruction *);
623
624 int nvc0_insn_refcount(struct nv_instruction *);
625 void nvc0_insn_delete(struct nv_instruction *);
626 void nvc0_insns_permute(struct nv_instruction *prev, struct nv_instruction *);
627
628 void nvc0_bblock_attach(struct nv_basic_block *parent,
629 struct nv_basic_block *child, ubyte edge_kind);
630 boolean nvc0_bblock_dominated_by(struct nv_basic_block *,
631 struct nv_basic_block *);
632 boolean nvc0_bblock_reachable_by(struct nv_basic_block *future,
633 struct nv_basic_block *past,
634 struct nv_basic_block *final);
635 struct nv_basic_block *nvc0_bblock_dom_frontier(struct nv_basic_block *);
636
637 int nvc0_pc_replace_value(struct nv_pc *pc,
638 struct nv_value *old_val,
639 struct nv_value *new_val);
640
641 struct nv_value *nvc0_pc_find_immediate(struct nv_ref *);
642 struct nv_value *nvc0_pc_find_constant(struct nv_ref *);
643
644 typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
645
646 void nvc0_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
647
648 int nvc0_pc_exec_pass0(struct nv_pc *pc);
649 int nvc0_pc_exec_pass1(struct nv_pc *pc);
650 int nvc0_pc_exec_pass2(struct nv_pc *pc);
651
652 int nvc0_tgsi_to_nc(struct nv_pc *, struct nvc0_translation_info *);
653
654 #endif // NV50_COMPILER_H