freedreno/ir3: add ir3 builder helpers
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3.h
1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef IR3_H_
25 #define IR3_H_
26
27 #include <stdint.h>
28 #include <stdbool.h>
29
30 #include "util/u_debug.h"
31
32 #include "instr-a3xx.h"
33 #include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
34
35 /* low level intermediate representation of an adreno shader program */
36
37 struct ir3;
38 struct ir3_instruction;
39 struct ir3_block;
40
41 struct ir3_info {
42 uint16_t sizedwords;
43 uint16_t instrs_count; /* expanded to account for rpt's */
44 /* NOTE: max_reg, etc, does not include registers not touched
45 * by the shader (ie. vertex fetched via VFD_DECODE but not
46 * touched by shader)
47 */
48 int8_t max_reg; /* highest GPR # used by shader */
49 int8_t max_half_reg;
50 int16_t max_const;
51 };
52
53 struct ir3_register {
54 enum {
55 IR3_REG_CONST = 0x001,
56 IR3_REG_IMMED = 0x002,
57 IR3_REG_HALF = 0x004,
58 IR3_REG_RELATIV= 0x008,
59 IR3_REG_R = 0x010,
60 IR3_REG_NEGATE = 0x020,
61 IR3_REG_ABS = 0x040,
62 IR3_REG_EVEN = 0x080,
63 IR3_REG_POS_INF= 0x100,
64 /* (ei) flag, end-input? Set on last bary, presumably to signal
65 * that the shader needs no more input:
66 */
67 IR3_REG_EI = 0x200,
68 /* meta-flags, for intermediate stages of IR, ie.
69 * before register assignment is done:
70 */
71 IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
72 IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
73 IR3_REG_ADDR = 0x4000, /* register is a0.x */
74 } flags;
75 union {
76 /* normal registers:
77 * the component is in the low two bits of the reg #, so
78 * rN.x becomes: (N << 2) | x
79 */
80 int num;
81 /* immediate: */
82 int32_t iim_val;
83 uint32_t uim_val;
84 float fim_val;
85 /* relative: */
86 int offset;
87 };
88
89 /* for IR3_REG_SSA, src registers contain ptr back to
90 * assigning instruction.
91 */
92 struct ir3_instruction *instr;
93
94 union {
95 /* used for cat5 instructions, but also for internal/IR level
96 * tracking of what registers are read/written by an instruction.
97 * wrmask may be a bad name since it is used to represent both
98 * src and dst that touch multiple adjacent registers.
99 */
100 unsigned wrmask;
101 /* for relative addressing, 32bits for array size is too small,
102 * but otoh we don't need to deal with disjoint sets, so instead
103 * use a simple size field (number of scalar components).
104 */
105 unsigned size;
106 };
107 };
108
109 struct ir3_instruction {
110 struct ir3_block *block;
111 int category;
112 opc_t opc;
113 enum {
114 /* (sy) flag is set on first instruction, and after sample
115 * instructions (probably just on RAW hazard).
116 */
117 IR3_INSTR_SY = 0x001,
118 /* (ss) flag is set on first instruction, and first instruction
119 * to depend on the result of "long" instructions (RAW hazard):
120 *
121 * rcp, rsq, log2, exp2, sin, cos, sqrt
122 *
123 * It seems to synchronize until all in-flight instructions are
124 * completed, for example:
125 *
126 * rsq hr1.w, hr1.w
127 * add.f hr2.z, (neg)hr2.z, hc0.y
128 * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
129 * rsq hr2.x, hr2.x
130 * (rpt1)nop
131 * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
132 * nop
133 * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
134 * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
135 * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
136 *
137 * The last mul.f does not have (ss) set, presumably because the
138 * (ss) on the previous instruction does the job.
139 *
140 * The blob driver also seems to set it on WAR hazards, although
141 * not really clear if this is needed or just blob compiler being
142 * sloppy. So far I haven't found a case where removing the (ss)
143 * causes problems for WAR hazard, but I could just be getting
144 * lucky:
145 *
146 * rcp r1.y, r3.y
147 * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
148 *
149 */
150 IR3_INSTR_SS = 0x002,
151 /* (jp) flag is set on jump targets:
152 */
153 IR3_INSTR_JP = 0x004,
154 IR3_INSTR_UL = 0x008,
155 IR3_INSTR_3D = 0x010,
156 IR3_INSTR_A = 0x020,
157 IR3_INSTR_O = 0x040,
158 IR3_INSTR_P = 0x080,
159 IR3_INSTR_S = 0x100,
160 IR3_INSTR_S2EN = 0x200,
161 /* meta-flags, for intermediate stages of IR, ie.
162 * before register assignment is done:
163 */
164 IR3_INSTR_MARK = 0x1000,
165 } flags;
166 int repeat;
167 #ifdef DEBUG
168 unsigned regs_max;
169 #endif
170 unsigned regs_count;
171 struct ir3_register **regs;
172 union {
173 struct {
174 char inv;
175 char comp;
176 int immed;
177 } cat0;
178 struct {
179 type_t src_type, dst_type;
180 } cat1;
181 struct {
182 enum {
183 IR3_COND_LT = 0,
184 IR3_COND_LE = 1,
185 IR3_COND_GT = 2,
186 IR3_COND_GE = 3,
187 IR3_COND_EQ = 4,
188 IR3_COND_NE = 5,
189 } condition;
190 } cat2;
191 struct {
192 unsigned samp, tex;
193 type_t type;
194 } cat5;
195 struct {
196 type_t type;
197 int offset;
198 int iim_val;
199 } cat6;
200 /* for meta-instructions, just used to hold extra data
201 * before instruction scheduling, etc
202 */
203 struct {
204 int off; /* component/offset */
205 } fo;
206 struct {
207 int aid;
208 } fi;
209 struct {
210 struct ir3_block *if_block, *else_block;
211 } flow;
212 struct {
213 struct ir3_block *block;
214 } inout;
215
216 /* XXX keep this as big as all other union members! */
217 uint32_t info[3];
218 };
219
220 /* transient values used during various algorithms: */
221 union {
222 /* The instruction depth is the max dependency distance to output.
223 *
224 * You can also think of it as the "cost", if we did any sort of
225 * optimization for register footprint. Ie. a value that is just
226 * result of moving a const to a reg would have a low cost, so to
227 * it could make sense to duplicate the instruction at various
228 * points where the result is needed to reduce register footprint.
229 *
230 * DEPTH_UNUSED used to mark unused instructions after depth
231 * calculation pass.
232 */
233 #define DEPTH_UNUSED ~0
234 unsigned depth;
235 };
236
237 /* Used during CP and RA stages. For fanin and shader inputs/
238 * outputs where we need a sequence of consecutive registers,
239 * keep track of each src instructions left (ie 'n-1') and right
240 * (ie 'n+1') neighbor. The front-end must insert enough mov's
241 * to ensure that each instruction has at most one left and at
242 * most one right neighbor. During the copy-propagation pass,
243 * we only remove mov's when we can preserve this constraint.
244 * And during the RA stage, we use the neighbor information to
245 * allocate a block of registers in one shot.
246 *
247 * TODO: maybe just add something like:
248 * struct ir3_instruction_ref {
249 * struct ir3_instruction *instr;
250 * unsigned cnt;
251 * }
252 *
253 * Or can we get away without the refcnt stuff? It seems like
254 * it should be overkill.. the problem is if, potentially after
255 * already eliminating some mov's, if you have a single mov that
256 * needs to be grouped with it's neighbors in two different
257 * places (ex. shader output and a fanin).
258 */
259 struct {
260 struct ir3_instruction *left, *right;
261 uint16_t left_cnt, right_cnt;
262 } cp;
263
264 /* an instruction can reference at most one address register amongst
265 * it's src/dst registers. Beyond that, you need to insert mov's.
266 */
267 struct ir3_instruction *address;
268
269 /* in case of a instruction with relative dst instruction, we need to
270 * capture the dependency on the fanin for the previous values of
271 * the array elements. Since we don't know at compile time actually
272 * which array elements are written, this serves to preserve the
273 * unconditional write to array elements prior to the conditional
274 * write.
275 *
276 * TODO only cat1 can do indirect write.. we could maybe move this
277 * into instr->cat1.fanin (but would require the frontend to insert
278 * the extra mov)
279 */
280 struct ir3_instruction *fanin;
281
282 struct ir3_instruction *next;
283 #ifdef DEBUG
284 uint32_t serialno;
285 #endif
286 };
287
288 static inline struct ir3_instruction *
289 ir3_neighbor_first(struct ir3_instruction *instr)
290 {
291 while (instr->cp.left)
292 instr = instr->cp.left;
293 return instr;
294 }
295
296 static inline int ir3_neighbor_count(struct ir3_instruction *instr)
297 {
298 int num = 1;
299
300 debug_assert(!instr->cp.left);
301
302 while (instr->cp.right) {
303 num++;
304 instr = instr->cp.right;
305 }
306
307 return num;
308 }
309
310 struct ir3_heap_chunk;
311
312 struct ir3 {
313 unsigned instrs_count, instrs_sz;
314 struct ir3_instruction **instrs;
315
316 /* Track bary.f (and ldlv) instructions.. this is needed in
317 * scheduling to ensure that all varying fetches happen before
318 * any potential kill instructions. The hw gets grumpy if all
319 * threads in a group are killed before the last bary.f gets
320 * a chance to signal end of input (ei).
321 */
322 unsigned baryfs_count, baryfs_sz;
323 struct ir3_instruction **baryfs;
324
325 /* Track all indirect instructions (read and write). To avoid
326 * deadlock scenario where an address register gets scheduled,
327 * but other dependent src instructions cannot be scheduled due
328 * to dependency on a *different* address register value, the
329 * scheduler needs to ensure that all dependencies other than
330 * the instruction other than the address register are scheduled
331 * before the one that writes the address register. Having a
332 * convenient list of instructions that reference some address
333 * register simplifies this.
334 */
335 unsigned indirects_count, indirects_sz;
336 struct ir3_instruction **indirects;
337
338 struct ir3_block *block;
339 unsigned heap_idx;
340 struct ir3_heap_chunk *chunk;
341 };
342
343 struct ir3_block {
344 struct ir3 *shader;
345 unsigned ntemporaries, ninputs, noutputs;
346 /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
347 struct ir3_instruction **temporaries;
348 struct ir3_instruction **inputs;
349 struct ir3_instruction **outputs;
350 /* only a single address register: */
351 struct ir3_instruction *address;
352 struct ir3_block *parent;
353 struct ir3_instruction *head;
354 };
355
356 struct ir3 * ir3_create(void);
357 void ir3_destroy(struct ir3 *shader);
358 void * ir3_assemble(struct ir3 *shader,
359 struct ir3_info *info, uint32_t gpu_id);
360 void * ir3_alloc(struct ir3 *shader, int sz);
361
362 struct ir3_block * ir3_block_create(struct ir3 *shader,
363 unsigned ntmp, unsigned nin, unsigned nout);
364
365 struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
366 int category, opc_t opc);
367 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
368 int category, opc_t opc, int nreg);
369 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
370 const char *ir3_instr_name(struct ir3_instruction *instr);
371
372 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
373 int num, int flags);
374
375
376 static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
377 {
378 if (instr->flags & IR3_INSTR_MARK)
379 return true; /* already visited */
380 instr->flags |= IR3_INSTR_MARK;
381 return false;
382 }
383
384 static inline void ir3_clear_mark(struct ir3 *shader)
385 {
386 /* TODO would be nice to drop the instruction array.. for
387 * new compiler, _clear_mark() is all we use it for, and
388 * we could probably manage a linked list instead..
389 *
390 * Also, we'll probably want to mark instructions within
391 * a block, so tracking the list of instrs globally is
392 * unlikely to be what we want.
393 */
394 unsigned i;
395 for (i = 0; i < shader->instrs_count; i++) {
396 struct ir3_instruction *instr = shader->instrs[i];
397 instr->flags &= ~IR3_INSTR_MARK;
398 }
399 }
400
401 static inline int ir3_instr_regno(struct ir3_instruction *instr,
402 struct ir3_register *reg)
403 {
404 unsigned i;
405 for (i = 0; i < instr->regs_count; i++)
406 if (reg == instr->regs[i])
407 return i;
408 return -1;
409 }
410
411
412 #define MAX_ARRAYS 16
413
414 /* comp:
415 * 0 - x
416 * 1 - y
417 * 2 - z
418 * 3 - w
419 */
420 static inline uint32_t regid(int num, int comp)
421 {
422 return (num << 2) | (comp & 0x3);
423 }
424
425 static inline uint32_t reg_num(struct ir3_register *reg)
426 {
427 return reg->num >> 2;
428 }
429
430 static inline uint32_t reg_comp(struct ir3_register *reg)
431 {
432 return reg->num & 0x3;
433 }
434
435 static inline bool is_flow(struct ir3_instruction *instr)
436 {
437 return (instr->category == 0);
438 }
439
440 static inline bool is_kill(struct ir3_instruction *instr)
441 {
442 return is_flow(instr) && (instr->opc == OPC_KILL);
443 }
444
445 static inline bool is_nop(struct ir3_instruction *instr)
446 {
447 return is_flow(instr) && (instr->opc == OPC_NOP);
448 }
449
450 static inline bool is_alu(struct ir3_instruction *instr)
451 {
452 return (1 <= instr->category) && (instr->category <= 3);
453 }
454
455 static inline bool is_sfu(struct ir3_instruction *instr)
456 {
457 return (instr->category == 4);
458 }
459
460 static inline bool is_tex(struct ir3_instruction *instr)
461 {
462 return (instr->category == 5);
463 }
464
465 static inline bool is_mem(struct ir3_instruction *instr)
466 {
467 return (instr->category == 6);
468 }
469
470 static inline bool is_input(struct ir3_instruction *instr)
471 {
472 /* in some cases, ldlv is used to fetch varying without
473 * interpolation.. fortunately inloc is the first src
474 * register in either case
475 */
476 if (is_mem(instr) && (instr->opc == OPC_LDLV))
477 return true;
478 return (instr->category == 2) && (instr->opc == OPC_BARY_F);
479 }
480
481 static inline bool is_meta(struct ir3_instruction *instr)
482 {
483 /* TODO how should we count PHI (and maybe fan-in/out) which
484 * might actually contribute some instructions to the final
485 * result?
486 */
487 return (instr->category == -1);
488 }
489
490 static inline bool writes_addr(struct ir3_instruction *instr)
491 {
492 if (instr->regs_count > 0) {
493 struct ir3_register *dst = instr->regs[0];
494 return !!(dst->flags & IR3_REG_ADDR);
495 }
496 return false;
497 }
498
499 static inline bool writes_pred(struct ir3_instruction *instr)
500 {
501 if (instr->regs_count > 0) {
502 struct ir3_register *dst = instr->regs[0];
503 return reg_num(dst) == REG_P0;
504 }
505 return false;
506 }
507
508 /* returns defining instruction for reg */
509 /* TODO better name */
510 static inline struct ir3_instruction *ssa(struct ir3_register *reg)
511 {
512 if (reg->flags & IR3_REG_SSA)
513 return reg->instr;
514 return NULL;
515 }
516
517 static inline bool reg_gpr(struct ir3_register *r)
518 {
519 if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
520 return false;
521 if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
522 return false;
523 return true;
524 }
525
526 #define array_insert(arr, val) do { \
527 if (arr ## _count == arr ## _sz) { \
528 arr ## _sz = MAX2(2 * arr ## _sz, 16); \
529 arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
530 } \
531 arr[arr ##_count++] = val; \
532 } while (0)
533
534 /* iterator for an instructions's sources (reg), also returns src #: */
535 #define foreach_src_n(__srcreg, __n, __instr) \
536 if ((__instr)->regs_count) \
537 for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
538 if ((__srcreg = (__instr)->regs[__n + 1]))
539
540 /* iterator for an instructions's sources (reg): */
541 #define foreach_src(__srcreg, __instr) \
542 foreach_src_n(__srcreg, __i, __instr)
543
544 static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
545 {
546 if (instr->fanin)
547 return instr->regs_count + 2;
548 if (instr->address)
549 return instr->regs_count + 1;
550 return instr->regs_count;
551 }
552
553 static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
554 {
555 if (n == (instr->regs_count + 1))
556 return instr->fanin;
557 if (n == (instr->regs_count + 0))
558 return instr->address;
559 return ssa(instr->regs[n]);
560 }
561
562 #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
563
564 /* iterator for an instruction's SSA sources (instr), also returns src #: */
565 #define foreach_ssa_src_n(__srcinst, __n, __instr) \
566 if ((__instr)->regs_count) \
567 for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
568 if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
569
570 /* iterator for an instruction's SSA sources (instr): */
571 #define foreach_ssa_src(__srcinst, __instr) \
572 foreach_ssa_src_n(__srcinst, __i, __instr)
573
574
575 /* dump: */
576 #include <stdio.h>
577 void ir3_dump(struct ir3 *shader, const char *name,
578 struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
579 FILE *f);
580 void ir3_dump_instr_single(struct ir3_instruction *instr);
581 void ir3_dump_instr_list(struct ir3_instruction *instr);
582
583 /* flatten if/else: */
584 int ir3_block_flatten(struct ir3_block *block);
585
586 /* depth calculation: */
587 int ir3_delayslots(struct ir3_instruction *assigner,
588 struct ir3_instruction *consumer, unsigned n);
589 void ir3_block_depth(struct ir3_block *block);
590
591 /* copy-propagate: */
592 void ir3_block_cp(struct ir3_block *block);
593
594 /* group neightbors and insert mov's to resolve conflicts: */
595 void ir3_block_group(struct ir3_block *block);
596
597 /* scheduling: */
598 int ir3_block_sched(struct ir3_block *block);
599
600 /* register assignment: */
601 int ir3_block_ra(struct ir3_block *block, enum shader_t type,
602 bool frag_coord, bool frag_face);
603
604 /* legalize: */
605 void ir3_block_legalize(struct ir3_block *block,
606 bool *has_samp, int *max_bary);
607
608 /* ************************************************************************* */
609 /* instruction helpers */
610
611 static inline struct ir3_instruction *
612 ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
613 {
614 struct ir3_instruction *instr =
615 ir3_instr_create(block, 1, 0);
616 ir3_reg_create(instr, 0, 0); /* dst */
617 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
618 instr->cat1.src_type = type;
619 instr->cat1.dst_type = type;
620 return instr;
621 }
622
623 static inline struct ir3_instruction *
624 ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
625 type_t src_type, type_t dst_type)
626 {
627 struct ir3_instruction *instr =
628 ir3_instr_create(block, 1, 0);
629 ir3_reg_create(instr, 0, 0); /* dst */
630 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
631 instr->cat1.src_type = src_type;
632 instr->cat1.dst_type = dst_type;
633 return instr;
634 }
635
636 #define INSTR1(CAT, name) \
637 static inline struct ir3_instruction * \
638 ir3_##name(struct ir3_block *block, \
639 struct ir3_instruction *a, unsigned aflags) \
640 { \
641 struct ir3_instruction *instr = \
642 ir3_instr_create(block, CAT, OPC_##name); \
643 ir3_reg_create(instr, 0, 0); /* dst */ \
644 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
645 return instr; \
646 }
647
648 #define INSTR2(CAT, name) \
649 static inline struct ir3_instruction * \
650 ir3_##name(struct ir3_block *block, \
651 struct ir3_instruction *a, unsigned aflags, \
652 struct ir3_instruction *b, unsigned bflags) \
653 { \
654 struct ir3_instruction *instr = \
655 ir3_instr_create(block, CAT, OPC_##name); \
656 ir3_reg_create(instr, 0, 0); /* dst */ \
657 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
658 ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
659 return instr; \
660 }
661
662 #define INSTR3(CAT, name) \
663 static inline struct ir3_instruction * \
664 ir3_##name(struct ir3_block *block, \
665 struct ir3_instruction *a, unsigned aflags, \
666 struct ir3_instruction *b, unsigned bflags, \
667 struct ir3_instruction *c, unsigned cflags) \
668 { \
669 struct ir3_instruction *instr = \
670 ir3_instr_create(block, CAT, OPC_##name); \
671 ir3_reg_create(instr, 0, 0); /* dst */ \
672 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
673 ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
674 ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \
675 return instr; \
676 }
677
678 /* cat0 instructions: */
679 INSTR1(0, KILL);
680
681 /* cat2 instructions, most 2 src but some 1 src: */
682 INSTR2(2, ADD_F)
683 INSTR2(2, MIN_F)
684 INSTR2(2, MAX_F)
685 INSTR2(2, MUL_F)
686 INSTR1(2, SIGN_F)
687 INSTR2(2, CMPS_F)
688 INSTR1(2, ABSNEG_F)
689 INSTR2(2, CMPV_F)
690 INSTR1(2, FLOOR_F)
691 INSTR1(2, CEIL_F)
692 INSTR1(2, RNDNE_F)
693 INSTR1(2, RNDAZ_F)
694 INSTR1(2, TRUNC_F)
695 INSTR2(2, ADD_U)
696 INSTR2(2, ADD_S)
697 INSTR2(2, SUB_U)
698 INSTR2(2, SUB_S)
699 INSTR2(2, CMPS_U)
700 INSTR2(2, CMPS_S)
701 INSTR2(2, MIN_U)
702 INSTR2(2, MIN_S)
703 INSTR2(2, MAX_U)
704 INSTR2(2, MAX_S)
705 INSTR1(2, ABSNEG_S)
706 INSTR2(2, AND_B)
707 INSTR2(2, OR_B)
708 INSTR1(2, NOT_B)
709 INSTR2(2, XOR_B)
710 INSTR2(2, CMPV_U)
711 INSTR2(2, CMPV_S)
712 INSTR2(2, MUL_U)
713 INSTR2(2, MUL_S)
714 INSTR2(2, MULL_U)
715 INSTR1(2, BFREV_B)
716 INSTR1(2, CLZ_S)
717 INSTR1(2, CLZ_B)
718 INSTR2(2, SHL_B)
719 INSTR2(2, SHR_B)
720 INSTR2(2, ASHR_B)
721 INSTR2(2, BARY_F)
722 INSTR2(2, MGEN_B)
723 INSTR2(2, GETBIT_B)
724 INSTR1(2, SETRM)
725 INSTR1(2, CBITS_B)
726 INSTR2(2, SHB)
727 INSTR2(2, MSAD)
728
729 /* cat3 instructions: */
730 INSTR3(3, MAD_U16)
731 INSTR3(3, MADSH_U16)
732 INSTR3(3, MAD_S16)
733 INSTR3(3, MADSH_M16)
734 INSTR3(3, MAD_U24)
735 INSTR3(3, MAD_S24)
736 INSTR3(3, MAD_F16)
737 INSTR3(3, MAD_F32)
738 INSTR3(3, SEL_B16)
739 INSTR3(3, SEL_B32)
740 INSTR3(3, SEL_S16)
741 INSTR3(3, SEL_S32)
742 INSTR3(3, SEL_F16)
743 INSTR3(3, SEL_F32)
744 INSTR3(3, SAD_S16)
745 INSTR3(3, SAD_S32)
746
747 /* cat4 instructions: */
748 INSTR1(4, RCP)
749 INSTR1(4, RSQ)
750 INSTR1(4, LOG2)
751 INSTR1(4, EXP2)
752 INSTR1(4, SIN)
753 INSTR1(4, COS)
754 INSTR1(4, SQRT)
755
756 /* cat5 instructions: */
757 INSTR1(5, DSX)
758 INSTR1(5, DSY)
759
760 /* cat6 instructions: */
761 INSTR2(6, LDLV)
762
763 /* ************************************************************************* */
764 /* split this out or find some helper to use.. like main/bitset.h.. */
765
766 #include <string.h>
767
768 #define MAX_REG 256
769
770 typedef uint8_t regmask_t[2 * MAX_REG / 8];
771
772 static inline unsigned regmask_idx(struct ir3_register *reg)
773 {
774 unsigned num = reg->num;
775 debug_assert(num < MAX_REG);
776 if (reg->flags & IR3_REG_HALF)
777 num += MAX_REG;
778 return num;
779 }
780
781 static inline void regmask_init(regmask_t *regmask)
782 {
783 memset(regmask, 0, sizeof(*regmask));
784 }
785
786 static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
787 {
788 unsigned idx = regmask_idx(reg);
789 if (reg->flags & IR3_REG_RELATIV) {
790 unsigned i;
791 for (i = 0; i < reg->size; i++, idx++)
792 (*regmask)[idx / 8] |= 1 << (idx % 8);
793 } else {
794 unsigned mask;
795 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
796 if (mask & 1)
797 (*regmask)[idx / 8] |= 1 << (idx % 8);
798 }
799 }
800
801 static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
802 {
803 unsigned i;
804 for (i = 0; i < ARRAY_SIZE(*dst); i++)
805 (*dst)[i] = (*a)[i] | (*b)[i];
806 }
807
808 /* set bits in a if not set in b, conceptually:
809 * a |= (reg & ~b)
810 */
811 static inline void regmask_set_if_not(regmask_t *a,
812 struct ir3_register *reg, regmask_t *b)
813 {
814 unsigned idx = regmask_idx(reg);
815 if (reg->flags & IR3_REG_RELATIV) {
816 unsigned i;
817 for (i = 0; i < reg->size; i++, idx++)
818 if (!((*b)[idx / 8] & (1 << (idx % 8))))
819 (*a)[idx / 8] |= 1 << (idx % 8);
820 } else {
821 unsigned mask;
822 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
823 if (mask & 1)
824 if (!((*b)[idx / 8] & (1 << (idx % 8))))
825 (*a)[idx / 8] |= 1 << (idx % 8);
826 }
827 }
828
829 static inline bool regmask_get(regmask_t *regmask,
830 struct ir3_register *reg)
831 {
832 unsigned idx = regmask_idx(reg);
833 if (reg->flags & IR3_REG_RELATIV) {
834 unsigned i;
835 for (i = 0; i < reg->size; i++, idx++)
836 if ((*regmask)[idx / 8] & (1 << (idx % 8)))
837 return true;
838 } else {
839 unsigned mask;
840 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
841 if (mask & 1)
842 if ((*regmask)[idx / 8] & (1 << (idx % 8)))
843 return true;
844 }
845 return false;
846 }
847
848 /* ************************************************************************* */
849
850 #endif /* IR3_H_ */