radeonsi: use current context for DCC feedback-loop decompress, fixes Elemental
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc.h
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #ifndef SB_BC_H_
28 #define SB_BC_H_
29
30 #include <stdint.h>
31 #include "r600_isa.h"
32
33 #include <cstdio>
34 #include <string>
35 #include <vector>
36 #include <stack>
37
38 struct r600_bytecode;
39 struct r600_shader;
40
41 namespace r600_sb {
42
43 class hw_encoding_format;
44 class node;
45 class alu_node;
46 class cf_node;
47 class fetch_node;
48 class alu_group_node;
49 class region_node;
50 class shader;
51 class value;
52
53 class sb_ostream {
54 public:
55 sb_ostream() {}
56
57 virtual void write(const char *s) = 0;
58
59 sb_ostream& operator <<(const char *s) {
60 write(s);
61 return *this;
62 }
63
64 sb_ostream& operator <<(const std::string& s) {
65 return *this << s.c_str();
66 }
67
68 sb_ostream& operator <<(void *p) {
69 char b[32];
70 sprintf(b, "%p", p);
71 return *this << b;
72 }
73
74 sb_ostream& operator <<(char c) {
75 char b[2];
76 sprintf(b, "%c", c);
77 return *this << b;
78 }
79
80 sb_ostream& operator <<(int n) {
81 char b[32];
82 sprintf(b, "%d", n);
83 return *this << b;
84 }
85
86 sb_ostream& operator <<(unsigned n) {
87 char b[32];
88 sprintf(b, "%u", n);
89 return *this << b;
90 }
91
92 sb_ostream& operator <<(double d) {
93 char b[32];
94 snprintf(b, 32, "%g", d);
95 return *this << b;
96 }
97
98 // print as field of specified width, right aligned
99 void print_w(int n, int width) {
100 char b[256],f[8];
101 sprintf(f, "%%%dd", width);
102 snprintf(b, 256, f, n);
103 write(b);
104 }
105
106 // print as field of specified width, left aligned
107 void print_wl(int n, int width) {
108 char b[256],f[8];
109 sprintf(f, "%%-%dd", width);
110 snprintf(b, 256, f, n);
111 write(b);
112 }
113
114 // print as field of specified width, left aligned
115 void print_wl(const std::string &s, int width) {
116 write(s.c_str());
117 int l = s.length();
118 while (l++ < width) {
119 write(" ");
120 }
121 }
122
123 // print int as field of specified width, right aligned, zero-padded
124 void print_zw(int n, int width) {
125 char b[256],f[8];
126 sprintf(f, "%%0%dd", width);
127 snprintf(b, 256, f, n);
128 write(b);
129 }
130
131 // print int as field of specified width, right aligned, zero-padded, hex
132 void print_zw_hex(int n, int width) {
133 char b[256],f[8];
134 sprintf(f, "%%0%dx", width);
135 snprintf(b, 256, f, n);
136 write(b);
137 }
138 };
139
140 class sb_ostringstream : public sb_ostream {
141 std::string data;
142 public:
143 sb_ostringstream() : data() {}
144
145 virtual void write(const char *s) {
146 data += s;
147 }
148
149 void clear() { data.clear(); }
150
151 const char* c_str() { return data.c_str(); }
152 std::string& str() { return data; }
153 };
154
155 class sb_log : public sb_ostream {
156 FILE *o;
157 public:
158 sb_log() : o(stderr) {}
159
160 virtual void write(const char *s) {
161 fputs(s, o);
162 }
163 };
164
165 extern sb_log sblog;
166
167 enum shader_target
168 {
169 TARGET_UNKNOWN,
170 TARGET_VS,
171 TARGET_ES,
172 TARGET_PS,
173 TARGET_GS,
174 TARGET_GS_COPY,
175 TARGET_COMPUTE,
176 TARGET_FETCH,
177 TARGET_HS,
178 TARGET_LS,
179
180 TARGET_NUM
181 };
182
183 enum sb_hw_class_bits
184 {
185 HB_R6 = (1<<0),
186 HB_R7 = (1<<1),
187 HB_EG = (1<<2),
188 HB_CM = (1<<3),
189
190 HB_R6R7 = (HB_R6 | HB_R7),
191 HB_EGCM = (HB_EG | HB_CM),
192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194
195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196 };
197
198 enum sb_hw_chip
199 {
200 HW_CHIP_UNKNOWN,
201 HW_CHIP_R600,
202 HW_CHIP_RV610,
203 HW_CHIP_RV630,
204 HW_CHIP_RV670,
205 HW_CHIP_RV620,
206 HW_CHIP_RV635,
207 HW_CHIP_RS780,
208 HW_CHIP_RS880,
209 HW_CHIP_RV770,
210 HW_CHIP_RV730,
211 HW_CHIP_RV710,
212 HW_CHIP_RV740,
213 HW_CHIP_CEDAR,
214 HW_CHIP_REDWOOD,
215 HW_CHIP_JUNIPER,
216 HW_CHIP_CYPRESS,
217 HW_CHIP_HEMLOCK,
218 HW_CHIP_PALM,
219 HW_CHIP_SUMO,
220 HW_CHIP_SUMO2,
221 HW_CHIP_BARTS,
222 HW_CHIP_TURKS,
223 HW_CHIP_CAICOS,
224 HW_CHIP_CAYMAN,
225 HW_CHIP_ARUBA
226 };
227
228 enum sb_hw_class
229 {
230 HW_CLASS_UNKNOWN,
231 HW_CLASS_R600,
232 HW_CLASS_R700,
233 HW_CLASS_EVERGREEN,
234 HW_CLASS_CAYMAN
235 };
236
237 enum alu_slots {
238 SLOT_X = 0,
239 SLOT_Y = 1,
240 SLOT_Z = 2,
241 SLOT_W = 3,
242 SLOT_TRANS = 4
243 };
244
245 enum misc_consts {
246 MAX_ALU_LITERALS = 4,
247 MAX_ALU_SLOTS = 128,
248 MAX_GPR = 128,
249 MAX_CHAN = 4
250
251 };
252
253 enum alu_src_sel {
254
255 ALU_SRC_LDS_OQ_A = 219,
256 ALU_SRC_LDS_OQ_B = 220,
257 ALU_SRC_LDS_OQ_A_POP = 221,
258 ALU_SRC_LDS_OQ_B_POP = 222,
259 ALU_SRC_LDS_DIRECT_A = 223,
260 ALU_SRC_LDS_DIRECT_B = 224,
261 ALU_SRC_TIME_HI = 227,
262 ALU_SRC_TIME_LO = 228,
263 ALU_SRC_MASK_HI = 229,
264 ALU_SRC_MASK_LO = 230,
265 ALU_SRC_HW_WAVE_ID = 231,
266 ALU_SRC_SIMD_ID = 232,
267 ALU_SRC_SE_ID = 233,
268 ALU_SRC_HW_THREADGRP_ID = 234,
269 ALU_SRC_WAVE_ID_IN_GRP = 235,
270 ALU_SRC_NUM_THREADGRP_WAVES = 236,
271 ALU_SRC_HW_ALU_ODD = 237,
272 ALU_SRC_LOOP_IDX = 238,
273 ALU_SRC_PARAM_BASE_ADDR = 240,
274 ALU_SRC_NEW_PRIM_MASK = 241,
275 ALU_SRC_PRIM_MASK_HI = 242,
276 ALU_SRC_PRIM_MASK_LO = 243,
277 ALU_SRC_1_DBL_L = 244,
278 ALU_SRC_1_DBL_M = 245,
279 ALU_SRC_0_5_DBL_L = 246,
280 ALU_SRC_0_5_DBL_M = 247,
281 ALU_SRC_0 = 248,
282 ALU_SRC_1 = 249,
283 ALU_SRC_1_INT = 250,
284 ALU_SRC_M_1_INT = 251,
285 ALU_SRC_0_5 = 252,
286 ALU_SRC_LITERAL = 253,
287 ALU_SRC_PV = 254,
288 ALU_SRC_PS = 255,
289
290 ALU_SRC_PARAM_OFFSET = 448
291 };
292
293 enum alu_predicate_select
294 {
295 PRED_SEL_OFF = 0,
296 // RESERVED = 1,
297 PRED_SEL_0 = 2,
298 PRED_SEL_1 = 3
299 };
300
301
302 enum alu_omod {
303 OMOD_OFF = 0,
304 OMOD_M2 = 1,
305 OMOD_M4 = 2,
306 OMOD_D2 = 3
307 };
308
309 enum alu_index_mode {
310 INDEX_AR_X = 0,
311 INDEX_AR_Y_R600 = 1,
312 INDEX_AR_Z_R600 = 2,
313 INDEX_AR_W_R600 = 3,
314
315 INDEX_LOOP = 4,
316 INDEX_GLOBAL = 5,
317 INDEX_GLOBAL_AR_X = 6
318 };
319
320 enum alu_cayman_mova_dst {
321 CM_MOVADST_AR_X,
322 CM_MOVADST_PC,
323 CM_MOVADST_IDX0,
324 CM_MOVADST_IDX1,
325 CM_MOVADST_CG0, // clause-global byte 0
326 CM_MOVADST_CG1,
327 CM_MOVADST_CG2,
328 CM_MOVADST_CG3
329 };
330
331 enum alu_cayman_exec_mask_op {
332 CM_EMO_DEACTIVATE,
333 CM_EMO_BREAK,
334 CM_EMO_CONTINUE,
335 CM_EMO_KILL
336 };
337
338
339 enum cf_exp_type {
340 EXP_PIXEL,
341 EXP_POS,
342 EXP_PARAM,
343
344 EXP_TYPE_COUNT
345 };
346
347 enum cf_mem_type {
348 MEM_WRITE,
349 MEM_WRITE_IND,
350 MEM_WRITE_ACK,
351 MEM_WRITE_IND_ACK
352 };
353
354
355 enum alu_kcache_mode {
356 KC_LOCK_NONE,
357 KC_LOCK_1,
358 KC_LOCK_2,
359 KC_LOCK_LOOP
360 };
361
362 enum alu_kcache_index_mode {
363 KC_INDEX_NONE,
364 KC_INDEX_0,
365 KC_INDEX_1,
366 KC_INDEX_INVALID
367 };
368
369 enum chan_select {
370 SEL_X = 0,
371 SEL_Y = 1,
372 SEL_Z = 2,
373 SEL_W = 3,
374 SEL_0 = 4,
375 SEL_1 = 5,
376 // RESERVED = 6,
377 SEL_MASK = 7
378 };
379
380 enum bank_swizzle {
381 VEC_012 = 0,
382 VEC_021 = 1,
383 VEC_120 = 2,
384 VEC_102 = 3,
385 VEC_201 = 4,
386 VEC_210 = 5,
387
388 VEC_NUM = 6,
389
390 SCL_210 = 0,
391 SCL_122 = 1,
392 SCL_212 = 2,
393 SCL_221 = 3,
394
395 SCL_NUM = 4
396
397 };
398
399 enum sched_queue_id {
400 SQ_CF,
401 SQ_ALU,
402 SQ_TEX,
403 SQ_VTX,
404
405 SQ_NUM
406 };
407
408 struct literal {
409 union {
410 int32_t i;
411 uint32_t u;
412 float f;
413 };
414
415 literal(int32_t i = 0) : i(i) {}
416 literal(uint32_t u) : u(u) {}
417 literal(float f) : f(f) {}
418 literal(double f) : f(f) {}
419 operator uint32_t() const { return u; }
420 bool operator ==(literal l) { return u == l.u; }
421 bool operator ==(int v_int) { return i == v_int; }
422 bool operator ==(unsigned v_uns) { return u == v_uns; }
423 };
424
425 struct bc_kcache {
426 unsigned mode;
427 unsigned bank;
428 unsigned addr;
429 unsigned index_mode;
430 } ;
431
432 // TODO optimize bc structures
433
434 struct bc_cf {
435
436 bc_kcache kc[4];
437
438 unsigned id;
439
440
441 const cf_op_info * op_ptr;
442 unsigned op;
443
444 unsigned addr:32;
445
446 unsigned alt_const:1;
447 unsigned uses_waterfall:1;
448
449 unsigned barrier:1;
450 unsigned count:7;
451 unsigned pop_count:3;
452 unsigned call_count:6;
453 unsigned whole_quad_mode:1;
454 unsigned valid_pixel_mode:1;
455
456 unsigned jumptable_sel:3;
457 unsigned cf_const:5;
458 unsigned cond:2;
459 unsigned end_of_program:1;
460
461 unsigned array_base:13;
462 unsigned elem_size:2;
463 unsigned index_gpr:7;
464 unsigned rw_gpr:7;
465 unsigned rw_rel:1;
466 unsigned type:2;
467
468 unsigned burst_count:4;
469 unsigned mark:1;
470 unsigned sel[4];
471
472 unsigned array_size:12;
473 unsigned comp_mask:4;
474
475 unsigned rat_id:4;
476 unsigned rat_inst:6;
477 unsigned rat_index_mode:2;
478
479 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
480
481 bool is_alu_extended() {
482 assert(op_ptr->flags & CF_ALU);
483 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
484 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
485 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
486 }
487
488 };
489
490 struct bc_alu_src {
491 unsigned sel:9;
492 unsigned chan:2;
493 unsigned neg:1;
494 unsigned abs:1;
495 unsigned rel:1;
496 literal value;
497 };
498
499 struct bc_alu {
500 const alu_op_info * op_ptr;
501 unsigned op;
502
503 bc_alu_src src[3];
504
505 unsigned dst_gpr:7;
506 unsigned dst_chan:2;
507 unsigned dst_rel:1;
508 unsigned clamp:1;
509 unsigned omod:2;
510 unsigned bank_swizzle:3;
511
512 unsigned index_mode:3;
513 unsigned last:1;
514 unsigned pred_sel:2;
515
516 unsigned fog_merge:1;
517 unsigned write_mask:1;
518 unsigned update_exec_mask:1;
519 unsigned update_pred:1;
520
521 unsigned slot:3;
522
523 unsigned lds_idx_offset:6;
524
525 alu_op_flags slot_flags;
526
527 void set_op(unsigned op) {
528 this->op = op;
529 op_ptr = r600_isa_alu(op);
530 }
531 };
532
533 struct bc_fetch {
534 const fetch_op_info * op_ptr;
535 unsigned op;
536
537 unsigned bc_frac_mode:1;
538 unsigned fetch_whole_quad:1;
539 unsigned resource_id:8;
540
541 unsigned src_gpr:7;
542 unsigned src_rel:1;
543 unsigned src_rel_global:1; /* for GDS ops */
544 unsigned src_sel[4];
545
546 unsigned dst_gpr:7;
547 unsigned dst_rel:1;
548 unsigned dst_rel_global:1; /* for GDS ops */
549 unsigned dst_sel[4];
550
551 unsigned alt_const:1;
552
553 unsigned inst_mod:2;
554 unsigned resource_index_mode:2;
555 unsigned sampler_index_mode:2;
556
557 unsigned coord_type[4];
558 unsigned lod_bias:7;
559
560 unsigned offset[3];
561
562 unsigned sampler_id:5;
563
564
565 unsigned fetch_type:2;
566 unsigned mega_fetch_count:6;
567 unsigned coalesced_read:1;
568 unsigned structured_read:2;
569 unsigned lds_req:1;
570
571 unsigned data_format:6;
572 unsigned format_comp_all:1;
573 unsigned num_format_all:2;
574 unsigned semantic_id:8;
575 unsigned srf_mode_all:1;
576 unsigned use_const_fields:1;
577
578 unsigned const_buf_no_stride:1;
579 unsigned endian_swap:2;
580 unsigned mega_fetch:1;
581
582 unsigned src2_gpr:7; /* for GDS */
583 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
584 };
585
586 struct shader_stats {
587 unsigned ndw;
588 unsigned ngpr;
589 unsigned nstack;
590
591 unsigned cf; // clause instructions not included
592 unsigned alu;
593 unsigned alu_clauses;
594 unsigned fetch_clauses;
595 unsigned fetch;
596 unsigned alu_groups;
597
598 unsigned shaders; // number of shaders (for accumulated stats)
599
600 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
601 fetch_clauses(), fetch(), alu_groups(), shaders() {}
602
603 void collect(node *n);
604 void accumulate(shader_stats &s);
605 void dump();
606 void dump_diff(shader_stats &s);
607 };
608
609 class sb_context {
610
611 public:
612
613 shader_stats src_stats, opt_stats;
614
615 r600_isa *isa;
616
617 sb_hw_chip hw_chip;
618 sb_hw_class hw_class;
619
620 unsigned alu_temp_gprs;
621 unsigned max_fetch;
622 bool has_trans;
623 unsigned vtx_src_num;
624 unsigned num_slots;
625 bool uses_mova_gpr;
626
627 bool r6xx_gpr_index_workaround;
628
629 bool stack_workaround_8xx;
630 bool stack_workaround_9xx;
631
632 unsigned wavefront_size;
633 unsigned stack_entry_size;
634
635 static unsigned dump_pass;
636 static unsigned dump_stat;
637
638 static unsigned dry_run;
639 static unsigned no_fallback;
640 static unsigned safe_math;
641
642 static unsigned dskip_start;
643 static unsigned dskip_end;
644 static unsigned dskip_mode;
645
646 sb_context() : src_stats(), opt_stats(), isa(0),
647 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
648
649 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
650
651 bool is_r600() {return hw_class == HW_CLASS_R600;}
652 bool is_r700() {return hw_class == HW_CLASS_R700;}
653 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
654 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
655 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
656
657 bool needs_8xx_stack_workaround() {
658 if (!is_evergreen())
659 return false;
660
661 switch (hw_chip) {
662 case HW_CHIP_CYPRESS:
663 case HW_CHIP_JUNIPER:
664 return false;
665 default:
666 return true;
667 }
668 }
669
670 bool needs_9xx_stack_workaround() {
671 return is_cayman();
672 }
673
674 sb_hw_class_bits hw_class_bit() {
675 switch (hw_class) {
676 case HW_CLASS_R600:return HB_R6;
677 case HW_CLASS_R700:return HB_R7;
678 case HW_CLASS_EVERGREEN:return HB_EG;
679 case HW_CLASS_CAYMAN:return HB_CM;
680 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
681
682 }
683 }
684
685 unsigned cf_opcode(unsigned op) {
686 return r600_isa_cf_opcode(isa->hw_class, op);
687 }
688
689 unsigned alu_opcode(unsigned op) {
690 return r600_isa_alu_opcode(isa->hw_class, op);
691 }
692
693 unsigned alu_slots(unsigned op) {
694 return r600_isa_alu_slots(isa->hw_class, op);
695 }
696
697 unsigned alu_slots(const alu_op_info * op_ptr) {
698 return op_ptr->slots[isa->hw_class];
699 }
700
701 unsigned alu_slots_mask(const alu_op_info * op_ptr) {
702 unsigned mask = 0;
703 unsigned slot_flags = alu_slots(op_ptr);
704 if (slot_flags & AF_V)
705 mask = 0x0F;
706 if (!is_cayman() && (slot_flags & AF_S))
707 mask |= 0x10;
708 return mask;
709 }
710
711 unsigned fetch_opcode(unsigned op) {
712 return r600_isa_fetch_opcode(isa->hw_class, op);
713 }
714
715 bool is_kcache_sel(unsigned sel) {
716 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
717 }
718
719 const char * get_hw_class_name();
720 const char * get_hw_chip_name();
721
722 };
723
724 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
725 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
726
727 class bc_decoder {
728
729 sb_context &ctx;
730
731 uint32_t* dw;
732 unsigned ndw;
733
734 public:
735
736 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
737 : ctx(sctx), dw(data), ndw(size) {}
738
739 int decode_cf(unsigned &i, bc_cf &bc);
740 int decode_alu(unsigned &i, bc_alu &bc);
741 int decode_fetch(unsigned &i, bc_fetch &bc);
742
743 private:
744 int decode_cf_alu(unsigned &i, bc_cf &bc);
745 int decode_cf_exp(unsigned &i, bc_cf &bc);
746 int decode_cf_mem(unsigned &i, bc_cf &bc);
747
748 int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
749 int decode_fetch_gds(unsigned &i, bc_fetch &bc);
750 };
751
752 // bytecode format definition
753
754 class hw_encoding_format {
755 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
756 hw_encoding_format();
757 protected:
758 uint32_t value;
759 public:
760 hw_encoding_format(sb_hw_class_bits hw)
761 : hw_target(hw), value(0) {}
762 hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
763 : hw_target(hw), value(v) {}
764 uint32_t get_value(sb_hw_class_bits hw) const {
765 assert((hw & hw_target) == hw);
766 return value;
767 }
768 };
769
770 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
771 class fmt##_##hwset : public hw_encoding_format {\
772 typedef fmt##_##hwset thistype; \
773 public: \
774 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
775 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
776
777 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
778
779 #define BC_FORMAT_END(fmt) };
780
781 // bytecode format field definition
782
783 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
784 thistype & name(unsigned v) { \
785 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
786 return *this; \
787 } \
788 unsigned get_##name() const { \
789 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
790 } \
791
792 #define BC_RSRVD(fmt, last_bit, first_bit)
793
794 // CLAMP macro defined elsewhere interferes with bytecode field name
795 #undef CLAMP
796 #include "sb_bc_fmt_def.inc"
797
798 #undef BC_FORMAT_BEGIN
799 #undef BC_FORMAT_END
800 #undef BC_FIELD
801 #undef BC_RSRVD
802
803 class bc_parser {
804 sb_context & ctx;
805
806 bc_decoder *dec;
807
808 r600_bytecode *bc;
809 r600_shader *pshader;
810
811 uint32_t *dw;
812 unsigned bc_ndw;
813
814 unsigned max_cf;
815
816 shader *sh;
817
818 int error;
819
820 alu_node *slots[2][5];
821 unsigned cgroup;
822
823 typedef std::vector<cf_node*> id_cf_map;
824 id_cf_map cf_map;
825
826 typedef std::stack<region_node*> region_stack;
827 region_stack loop_stack;
828
829 bool gpr_reladdr;
830
831 // Note: currently relies on input emitting SET_CF in same basic block as uses
832 value *cf_index_value[2];
833 alu_node *mova;
834 public:
835
836 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
837 ctx(sctx), dec(), bc(bc), pshader(pshader),
838 dw(), bc_ndw(), max_cf(),
839 sh(), error(), slots(), cgroup(),
840 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
841
842 int decode();
843 int prepare();
844
845 shader* get_shader() { assert(!error); return sh; }
846
847 private:
848
849 int decode_shader();
850
851 int parse_decls();
852
853 int decode_cf(unsigned &i, bool &eop);
854
855 int decode_alu_clause(cf_node *cf);
856 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
857
858 int decode_fetch_clause(cf_node *cf);
859
860 int prepare_ir();
861 int prepare_alu_clause(cf_node *cf);
862 int prepare_alu_group(cf_node* cf, alu_group_node *g);
863 int prepare_fetch_clause(cf_node *cf);
864
865 int prepare_loop(cf_node *c);
866 int prepare_if(cf_node *c);
867
868 void save_set_cf_index(value *val, unsigned idx);
869 value *get_cf_index_value(unsigned idx);
870 void save_mova(alu_node *mova);
871 alu_node *get_mova();
872 };
873
874
875
876
877 class bytecode {
878 typedef std::vector<uint32_t> bc_vector;
879 sb_hw_class_bits hw_class_bit;
880
881 bc_vector bc;
882
883 unsigned pos;
884
885 public:
886
887 bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
888 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
889
890 unsigned ndw() { return bc.size(); }
891
892 void write_data(uint32_t* dst) {
893 std::copy(bc.begin(), bc.end(), dst);
894 }
895
896 void align(unsigned a) {
897 unsigned size = bc.size();
898 size = (size + a - 1) & ~(a-1);
899 bc.resize(size);
900 }
901
902 void set_size(unsigned sz) {
903 assert(sz >= bc.size());
904 bc.resize(sz);
905 }
906
907 void seek(unsigned p) {
908 if (p != pos) {
909 if (p > bc.size()) {
910 bc.resize(p);
911 }
912 pos = p;
913 }
914 }
915
916 unsigned get_pos() { return pos; }
917 uint32_t *data() { return &bc[0]; }
918
919 bytecode & operator <<(uint32_t v) {
920 if (pos == ndw()) {
921 bc.push_back(v);
922 } else
923 bc.at(pos) = v;
924 ++pos;
925 return *this;
926 }
927
928 bytecode & operator <<(const hw_encoding_format &e) {
929 *this << e.get_value(hw_class_bit);
930 return *this;
931 }
932
933 bytecode & operator <<(const bytecode &b) {
934 bc.insert(bc.end(), b.bc.begin(), b.bc.end());
935 return *this;
936 }
937
938 uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
939 };
940
941
942 class bc_builder {
943 shader &sh;
944 sb_context &ctx;
945 bytecode bb;
946 int error;
947
948 public:
949
950 bc_builder(shader &s);
951 int build();
952 bytecode& get_bytecode() { assert(!error); return bb; }
953
954 private:
955
956 int build_cf(cf_node *n);
957
958 int build_cf_alu(cf_node *n);
959 int build_cf_mem(cf_node *n);
960 int build_cf_exp(cf_node *n);
961
962 int build_alu_clause(cf_node *n);
963 int build_alu_group(alu_group_node *n);
964 int build_alu(alu_node *n);
965
966 int build_fetch_clause(cf_node *n);
967 int build_fetch_tex(fetch_node *n);
968 int build_fetch_vtx(fetch_node *n);
969 };
970
971 } // namespace r600_sb
972
973 #endif /* SB_BC_H_ */