2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "util/u_format.h"
26 #include "util/u_memory.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "r600_pipe.h"
30 #include "r600_opcodes.h"
32 #include "r600_formats.h"
35 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu
*alu
)
41 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
:
43 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
:
44 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE
:
45 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
:
46 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE
:
47 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE
:
48 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
:
49 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
:
50 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
:
51 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
:
52 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
:
53 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
:
54 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
:
55 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE
:
56 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT
:
57 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE
:
58 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE
:
59 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
:
60 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE
:
61 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE
:
64 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
:
65 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR
:
66 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
:
67 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
:
68 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
:
69 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
:
70 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
:
71 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
:
72 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
:
73 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
:
74 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
:
75 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
:
76 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
:
79 "Need instruction operand number for 0x%x.\n", alu
->inst
);
85 int r700_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
);
87 static struct r600_bc_cf
*r600_bc_cf(void)
89 struct r600_bc_cf
*cf
= CALLOC_STRUCT(r600_bc_cf
);
93 LIST_INITHEAD(&cf
->list
);
94 LIST_INITHEAD(&cf
->alu
);
95 LIST_INITHEAD(&cf
->vtx
);
96 LIST_INITHEAD(&cf
->tex
);
100 static struct r600_bc_alu
*r600_bc_alu(void)
102 struct r600_bc_alu
*alu
= CALLOC_STRUCT(r600_bc_alu
);
106 LIST_INITHEAD(&alu
->list
);
107 LIST_INITHEAD(&alu
->bs_list
);
111 static struct r600_bc_vtx
*r600_bc_vtx(void)
113 struct r600_bc_vtx
*vtx
= CALLOC_STRUCT(r600_bc_vtx
);
117 LIST_INITHEAD(&vtx
->list
);
121 static struct r600_bc_tex
*r600_bc_tex(void)
123 struct r600_bc_tex
*tex
= CALLOC_STRUCT(r600_bc_tex
);
127 LIST_INITHEAD(&tex
->list
);
131 int r600_bc_init(struct r600_bc
*bc
, enum radeon_family family
)
133 LIST_INITHEAD(&bc
->cf
);
135 switch (bc
->family
) {
144 bc
->chiprev
= CHIPREV_R600
;
150 bc
->chiprev
= CHIPREV_R700
;
161 bc
->chiprev
= CHIPREV_EVERGREEN
;
164 R600_ERR("unknown family %d\n", bc
->family
);
170 static int r600_bc_add_cf(struct r600_bc
*bc
)
172 struct r600_bc_cf
*cf
= r600_bc_cf();
176 LIST_ADDTAIL(&cf
->list
, &bc
->cf
);
178 cf
->id
= bc
->cf_last
->id
+ 2;
182 bc
->force_add_cf
= 0;
186 int r600_bc_add_output(struct r600_bc
*bc
, const struct r600_bc_output
*output
)
190 r
= r600_bc_add_cf(bc
);
193 bc
->cf_last
->inst
= output
->inst
;
194 memcpy(&bc
->cf_last
->output
, output
, sizeof(struct r600_bc_output
));
198 const unsigned bank_swizzle_vec
[8] = {SQ_ALU_VEC_210
, //000
199 SQ_ALU_VEC_120
, //001
200 SQ_ALU_VEC_102
, //010
202 SQ_ALU_VEC_201
, //011
203 SQ_ALU_VEC_012
, //100
204 SQ_ALU_VEC_021
, //101
206 SQ_ALU_VEC_012
, //110
207 SQ_ALU_VEC_012
}; //111
209 const unsigned bank_swizzle_scl
[8] = {SQ_ALU_SCL_210
, //000
210 SQ_ALU_SCL_122
, //001
211 SQ_ALU_SCL_122
, //010
213 SQ_ALU_SCL_221
, //011
214 SQ_ALU_SCL_212
, //100
215 SQ_ALU_SCL_122
, //101
217 SQ_ALU_SCL_122
, //110
218 SQ_ALU_SCL_122
}; //111
220 static int init_gpr(struct r600_bc_alu
*alu
)
222 int cycle
, component
;
224 for (cycle
= 0; cycle
< NUM_OF_CYCLES
; cycle
++)
225 for (component
= 0; component
< NUM_OF_COMPONENTS
; component
++)
226 alu
->hw_gpr
[cycle
][component
] = -1;
231 static int reserve_gpr(struct r600_bc_alu
*alu
, unsigned sel
, unsigned chan
, unsigned cycle
)
233 if (alu
->hw_gpr
[cycle
][chan
] < 0)
234 alu
->hw_gpr
[cycle
][chan
] = sel
;
235 else if (alu
->hw_gpr
[cycle
][chan
] != (int)sel
) {
236 R600_ERR("Another scalar operation has already used GPR read port for channel\n");
242 static int cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, unsigned *p_cycle
)
248 table
[0] = 2; table
[1] = 1; table
[2] = 0;
249 *p_cycle
= table
[sel
];
252 table
[0] = 1; table
[1] = 2; table
[2] = 2;
253 *p_cycle
= table
[sel
];
256 table
[0] = 2; table
[1] = 1; table
[2] = 2;
257 *p_cycle
= table
[sel
];
260 table
[0] = 2; table
[1] = 2; table
[2] = 1;
261 *p_cycle
= table
[sel
];
265 R600_ERR("bad scalar bank swizzle value\n");
272 static int cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, unsigned *p_cycle
)
279 table
[0] = 0; table
[1] = 1; table
[2] = 2;
280 *p_cycle
= table
[sel
];
283 table
[0] = 0; table
[1] = 2; table
[2] = 1;
284 *p_cycle
= table
[sel
];
287 table
[0] = 1; table
[1] = 2; table
[2] = 0;
288 *p_cycle
= table
[sel
];
291 table
[0] = 1; table
[1] = 0; table
[2] = 2;
292 *p_cycle
= table
[sel
];
295 table
[0] = 2; table
[1] = 0; table
[2] = 1;
296 *p_cycle
= table
[sel
];
299 table
[0] = 2; table
[1] = 1; table
[2] = 0;
300 *p_cycle
= table
[sel
];
303 R600_ERR("bad vector bank swizzle value\n");
312 static void update_chan_counter(struct r600_bc_alu
*alu
, int *chan_counter
)
318 num_src
= r600_bc_get_num_operands(alu
);
320 for (i
= 0; i
< num_src
; i
++) {
321 channel_swizzle
= alu
->src
[i
].chan
;
322 if ((alu
->src
[i
].sel
> 0 && alu
->src
[i
].sel
< 128) && channel_swizzle
<= 3)
323 chan_counter
[channel_swizzle
]++;
327 /* we need something like this I think - but this is bogus */
328 int check_read_slots(struct r600_bc
*bc
, struct r600_bc_alu
*alu_first
)
330 struct r600_bc_alu
*alu
;
331 int chan_counter
[4] = { 0 };
333 update_chan_counter(alu_first
, chan_counter
);
335 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
336 update_chan_counter(alu
, chan_counter
);
339 if (chan_counter
[0] > 3 ||
340 chan_counter
[1] > 3 ||
341 chan_counter
[2] > 3 ||
342 chan_counter
[3] > 3) {
343 R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
344 alu_first
->inst
, chan_counter
[0], chan_counter
[1], chan_counter
[2], chan_counter
[3]);
351 /* CB constants start at 512, and get translated to a kcache index when ALU
352 * clauses are constructed. Note that we handle kcache constants the same way
353 * as (the now gone) cfile constants, is that really required? */
354 static int is_const(int sel
)
356 if (sel
> 511 && sel
< 4607)
361 static int check_scalar(struct r600_bc
*bc
, struct r600_bc_alu
*alu
)
363 unsigned swizzle_key
;
365 if (alu
->bank_swizzle_force
) {
366 alu
->bank_swizzle
= alu
->bank_swizzle_force
;
369 swizzle_key
= (is_const(alu
->src
[0].sel
) ? 4 : 0 ) +
370 (is_const(alu
->src
[1].sel
) ? 2 : 0 ) +
371 (is_const(alu
->src
[2].sel
) ? 1 : 0 );
373 alu
->bank_swizzle
= bank_swizzle_scl
[swizzle_key
];
377 static int check_vector(struct r600_bc
*bc
, struct r600_bc_alu
*alu
)
379 unsigned swizzle_key
;
381 if (alu
->bank_swizzle_force
) {
382 alu
->bank_swizzle
= alu
->bank_swizzle_force
;
385 swizzle_key
= (is_const(alu
->src
[0].sel
) ? 4 : 0 ) +
386 (is_const(alu
->src
[1].sel
) ? 2 : 0 ) +
387 (is_const(alu
->src
[2].sel
) ? 1 : 0 );
389 alu
->bank_swizzle
= bank_swizzle_vec
[swizzle_key
];
393 static int check_and_set_bank_swizzle(struct r600_bc
*bc
, struct r600_bc_alu
*alu_first
)
395 struct r600_bc_alu
*alu
= NULL
;
400 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
404 if (num_instr
== 1) {
405 check_scalar(bc
, alu_first
);
408 /* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
409 check_vector(bc
, alu_first
);
410 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
411 check_vector(bc
, alu
);
417 /* This code handles kcache lines as single blocks of 32 constants. We could
418 * probably do slightly better by recognizing that we actually have two
419 * consecutive lines of 16 constants, but the resulting code would also be
420 * somewhat more complicated. */
421 static int r600_bc_alloc_kcache_lines(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, int type
)
423 struct r600_bc_kcache
*kcache
= bc
->cf_last
->kcache
;
424 unsigned int required_lines
;
425 unsigned int free_lines
= 0;
426 unsigned int cache_line
[3];
427 unsigned int count
= 0;
431 /* Collect required cache lines. */
432 for (i
= 0; i
< 3; ++i
) {
436 if (alu
->src
[i
].sel
< 512)
439 line
= ((alu
->src
[i
].sel
- 512) / 32) * 2;
441 for (j
= 0; j
< count
; ++j
) {
442 if (cache_line
[j
] == line
) {
449 cache_line
[count
++] = line
;
452 /* This should never actually happen. */
453 if (count
>= 3) return -ENOMEM
;
455 for (i
= 0; i
< 2; ++i
) {
456 if (kcache
[i
].mode
== V_SQ_CF_KCACHE_NOP
) {
461 /* Filter lines pulled in by previous intructions. Note that this is
462 * only for the required_lines count, we can't remove these from the
463 * cache_line array since we may have to start a new ALU clause. */
464 for (i
= 0, required_lines
= count
; i
< count
; ++i
) {
465 for (j
= 0; j
< 2; ++j
) {
466 if (kcache
[j
].mode
== V_SQ_CF_KCACHE_LOCK_2
&&
467 kcache
[j
].addr
== cache_line
[i
]) {
474 /* Start a new ALU clause if needed. */
475 if (required_lines
> free_lines
) {
476 if ((r
= r600_bc_add_cf(bc
))) {
479 bc
->cf_last
->inst
= (type
<< 3);
480 kcache
= bc
->cf_last
->kcache
;
483 /* Setup the kcache lines. */
484 for (i
= 0; i
< count
; ++i
) {
487 for (j
= 0; j
< 2; ++j
) {
488 if (kcache
[j
].mode
== V_SQ_CF_KCACHE_LOCK_2
&&
489 kcache
[j
].addr
== cache_line
[i
]) {
497 for (j
= 0; j
< 2; ++j
) {
498 if (kcache
[j
].mode
== V_SQ_CF_KCACHE_NOP
) {
500 kcache
[j
].addr
= cache_line
[i
];
501 kcache
[j
].mode
= V_SQ_CF_KCACHE_LOCK_2
;
507 /* Alter the src operands to refer to the kcache. */
508 for (i
= 0; i
< 3; ++i
) {
509 static const unsigned int base
[] = {128, 160, 256, 288};
512 if (alu
->src
[i
].sel
< 512)
515 alu
->src
[i
].sel
-= 512;
516 line
= (alu
->src
[i
].sel
/ 32) * 2;
518 for (j
= 0; j
< 2; ++j
) {
519 if (kcache
[j
].mode
== V_SQ_CF_KCACHE_LOCK_2
&&
520 kcache
[j
].addr
== line
) {
521 alu
->src
[i
].sel
&= 0x1f;
522 alu
->src
[i
].sel
+= base
[j
];
531 int r600_bc_add_alu_type(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
, int type
)
533 struct r600_bc_alu
*nalu
= r600_bc_alu();
534 struct r600_bc_alu
*lalu
;
539 memcpy(nalu
, alu
, sizeof(struct r600_bc_alu
));
542 /* cf can contains only alu or only vtx or only tex */
543 if (bc
->cf_last
== NULL
|| bc
->cf_last
->inst
!= (type
<< 3) ||
545 r
= r600_bc_add_cf(bc
);
550 bc
->cf_last
->inst
= (type
<< 3);
553 /* Setup the kcache for this ALU instruction. This will start a new
554 * ALU clause if needed. */
555 if ((r
= r600_bc_alloc_kcache_lines(bc
, nalu
, type
))) {
560 if (!bc
->cf_last
->curr_bs_head
) {
561 bc
->cf_last
->curr_bs_head
= nalu
;
562 LIST_INITHEAD(&nalu
->bs_list
);
564 LIST_ADDTAIL(&nalu
->bs_list
, &bc
->cf_last
->curr_bs_head
->bs_list
);
566 /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
568 if (nalu
->last
&& (bc
->cf_last
->ndw
>> 1) >= 120) {
569 bc
->force_add_cf
= 1;
571 /* number of gpr == the last gpr used in any alu */
572 for (i
= 0; i
< 3; i
++) {
573 if (nalu
->src
[i
].sel
>= bc
->ngpr
&& nalu
->src
[i
].sel
< 128) {
574 bc
->ngpr
= nalu
->src
[i
].sel
+ 1;
576 /* compute how many literal are needed
577 * either 2 or 4 literals
579 if (nalu
->src
[i
].sel
== 253) {
580 if (((nalu
->src
[i
].chan
+ 2) & 0x6) > nalu
->nliteral
) {
581 nalu
->nliteral
= (nalu
->src
[i
].chan
+ 2) & 0x6;
585 if (!LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
586 lalu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
587 if (!lalu
->last
&& lalu
->nliteral
> nalu
->nliteral
) {
588 nalu
->nliteral
= lalu
->nliteral
;
591 if (nalu
->dst
.sel
>= bc
->ngpr
) {
592 bc
->ngpr
= nalu
->dst
.sel
+ 1;
594 LIST_ADDTAIL(&nalu
->list
, &bc
->cf_last
->alu
);
595 /* each alu use 2 dwords */
596 bc
->cf_last
->ndw
+= 2;
599 /* process cur ALU instructions for bank swizzle */
601 check_and_set_bank_swizzle(bc
, bc
->cf_last
->curr_bs_head
);
602 bc
->cf_last
->curr_bs_head
= NULL
;
607 int r600_bc_add_alu(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
)
609 return r600_bc_add_alu_type(bc
, alu
, BC_INST(bc
, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
));
612 int r600_bc_add_literal(struct r600_bc
*bc
, const u32
*value
)
614 struct r600_bc_alu
*alu
;
616 if (bc
->cf_last
== NULL
) {
619 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_TEX
) {
623 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_JUMP
||
624 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_ELSE
||
625 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
||
626 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
||
627 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
||
628 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
||
629 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_POP
) {
633 if (((bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3)) &&
634 (bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3))) ||
635 LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
636 R600_ERR("last CF is not ALU (%p)\n", bc
->cf_last
);
639 alu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
640 if (!alu
->last
|| !alu
->nliteral
|| alu
->literal_added
) {
643 memcpy(alu
->value
, value
, 4 * 4);
644 bc
->cf_last
->ndw
+= alu
->nliteral
;
645 bc
->ndw
+= alu
->nliteral
;
646 alu
->literal_added
= 1;
650 int r600_bc_add_vtx(struct r600_bc
*bc
, const struct r600_bc_vtx
*vtx
)
652 struct r600_bc_vtx
*nvtx
= r600_bc_vtx();
657 memcpy(nvtx
, vtx
, sizeof(struct r600_bc_vtx
));
659 /* cf can contains only alu or only vtx or only tex */
660 if (bc
->cf_last
== NULL
||
661 (bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX
&&
662 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
) ||
664 r
= r600_bc_add_cf(bc
);
669 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_VTX
;
671 LIST_ADDTAIL(&nvtx
->list
, &bc
->cf_last
->vtx
);
672 /* each fetch use 4 dwords */
673 bc
->cf_last
->ndw
+= 4;
675 if ((bc
->ndw
/ 4) > 7)
676 bc
->force_add_cf
= 1;
680 int r600_bc_add_tex(struct r600_bc
*bc
, const struct r600_bc_tex
*tex
)
682 struct r600_bc_tex
*ntex
= r600_bc_tex();
687 memcpy(ntex
, tex
, sizeof(struct r600_bc_tex
));
689 /* cf can contains only alu or only vtx or only tex */
690 if (bc
->cf_last
== NULL
||
691 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_TEX
||
693 r
= r600_bc_add_cf(bc
);
698 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_TEX
;
700 LIST_ADDTAIL(&ntex
->list
, &bc
->cf_last
->tex
);
701 /* each texture fetch use 4 dwords */
702 bc
->cf_last
->ndw
+= 4;
704 if ((bc
->ndw
/ 4) > 7)
705 bc
->force_add_cf
= 1;
709 int r600_bc_add_cfinst(struct r600_bc
*bc
, int inst
)
712 r
= r600_bc_add_cf(bc
);
716 bc
->cf_last
->cond
= V_SQ_CF_COND_ACTIVE
;
717 bc
->cf_last
->inst
= inst
;
721 /* common to all 3 families */
722 static int r600_bc_vtx_build(struct r600_bc
*bc
, struct r600_bc_vtx
*vtx
, unsigned id
)
724 unsigned fetch_resource_start
= 0;
726 /* check if we are fetch shader */
727 /* fetch shader can also access vertex resource,
728 * first fetch shader resource is at 160
730 if (bc
->type
== -1) {
731 switch (bc
->chiprev
) {
736 fetch_resource_start
= 160;
739 case CHIPREV_EVERGREEN
:
740 fetch_resource_start
= 0;
743 fprintf(stderr
, "%s:%s:%d unknown chiprev %d\n",
744 __FILE__
, __func__
, __LINE__
, bc
->chiprev
);
748 bc
->bytecode
[id
++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx
->buffer_id
+ fetch_resource_start
) |
749 S_SQ_VTX_WORD0_SRC_GPR(vtx
->src_gpr
) |
750 S_SQ_VTX_WORD0_SRC_SEL_X(vtx
->src_sel_x
) |
751 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx
->mega_fetch_count
);
752 bc
->bytecode
[id
++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx
->dst_sel_x
) |
753 S_SQ_VTX_WORD1_DST_SEL_Y(vtx
->dst_sel_y
) |
754 S_SQ_VTX_WORD1_DST_SEL_Z(vtx
->dst_sel_z
) |
755 S_SQ_VTX_WORD1_DST_SEL_W(vtx
->dst_sel_w
) |
756 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx
->use_const_fields
) |
757 S_SQ_VTX_WORD1_DATA_FORMAT(vtx
->data_format
) |
758 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx
->num_format_all
) |
759 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx
->format_comp_all
) |
760 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx
->srf_mode_all
) |
761 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx
->dst_gpr
);
762 bc
->bytecode
[id
++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
763 bc
->bytecode
[id
++] = 0;
767 /* common to all 3 families */
768 static int r600_bc_tex_build(struct r600_bc
*bc
, struct r600_bc_tex
*tex
, unsigned id
)
770 bc
->bytecode
[id
++] = S_SQ_TEX_WORD0_TEX_INST(tex
->inst
) |
771 S_SQ_TEX_WORD0_RESOURCE_ID(tex
->resource_id
) |
772 S_SQ_TEX_WORD0_SRC_GPR(tex
->src_gpr
) |
773 S_SQ_TEX_WORD0_SRC_REL(tex
->src_rel
);
774 bc
->bytecode
[id
++] = S_SQ_TEX_WORD1_DST_GPR(tex
->dst_gpr
) |
775 S_SQ_TEX_WORD1_DST_REL(tex
->dst_rel
) |
776 S_SQ_TEX_WORD1_DST_SEL_X(tex
->dst_sel_x
) |
777 S_SQ_TEX_WORD1_DST_SEL_Y(tex
->dst_sel_y
) |
778 S_SQ_TEX_WORD1_DST_SEL_Z(tex
->dst_sel_z
) |
779 S_SQ_TEX_WORD1_DST_SEL_W(tex
->dst_sel_w
) |
780 S_SQ_TEX_WORD1_LOD_BIAS(tex
->lod_bias
) |
781 S_SQ_TEX_WORD1_COORD_TYPE_X(tex
->coord_type_x
) |
782 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex
->coord_type_y
) |
783 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex
->coord_type_z
) |
784 S_SQ_TEX_WORD1_COORD_TYPE_W(tex
->coord_type_w
);
785 bc
->bytecode
[id
++] = S_SQ_TEX_WORD2_OFFSET_X(tex
->offset_x
) |
786 S_SQ_TEX_WORD2_OFFSET_Y(tex
->offset_y
) |
787 S_SQ_TEX_WORD2_OFFSET_Z(tex
->offset_z
) |
788 S_SQ_TEX_WORD2_SAMPLER_ID(tex
->sampler_id
) |
789 S_SQ_TEX_WORD2_SRC_SEL_X(tex
->src_sel_x
) |
790 S_SQ_TEX_WORD2_SRC_SEL_Y(tex
->src_sel_y
) |
791 S_SQ_TEX_WORD2_SRC_SEL_Z(tex
->src_sel_z
) |
792 S_SQ_TEX_WORD2_SRC_SEL_W(tex
->src_sel_w
);
793 bc
->bytecode
[id
++] = 0;
797 /* r600 only, r700/eg bits in r700_asm.c */
798 static int r600_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
)
802 /* don't replace gpr by pv or ps for destination register */
803 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
804 S_SQ_ALU_WORD0_SRC0_REL(alu
->src
[0].rel
) |
805 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
806 S_SQ_ALU_WORD0_SRC0_NEG(alu
->src
[0].neg
) |
807 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
808 S_SQ_ALU_WORD0_SRC1_REL(alu
->src
[1].rel
) |
809 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
810 S_SQ_ALU_WORD0_SRC1_NEG(alu
->src
[1].neg
) |
811 S_SQ_ALU_WORD0_LAST(alu
->last
);
814 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
815 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
816 S_SQ_ALU_WORD1_DST_REL(alu
->dst
.rel
) |
817 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
818 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu
->src
[2].sel
) |
819 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu
->src
[2].rel
) |
820 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu
->src
[2].chan
) |
821 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu
->src
[2].neg
) |
822 S_SQ_ALU_WORD1_OP3_ALU_INST(alu
->inst
) |
823 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu
->bank_swizzle
);
825 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
826 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
827 S_SQ_ALU_WORD1_DST_REL(alu
->dst
.rel
) |
828 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
829 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu
->src
[0].abs
) |
830 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu
->src
[1].abs
) |
831 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu
->dst
.write
) |
832 S_SQ_ALU_WORD1_OP2_ALU_INST(alu
->inst
) |
833 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu
->bank_swizzle
) |
834 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu
->predicate
) |
835 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu
->predicate
);
838 if (alu
->nliteral
&& !alu
->literal_added
) {
839 R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu
->inst
);
841 for (i
= 0; i
< alu
->nliteral
; i
++) {
842 bc
->bytecode
[id
++] = alu
->value
[i
];
848 /* common for r600/r700 - eg in eg_asm.c */
849 static int r600_bc_cf_build(struct r600_bc
*bc
, struct r600_bc_cf
*cf
)
851 unsigned id
= cf
->id
;
854 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
855 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
856 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD0_ADDR(cf
->addr
>> 1) |
857 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf
->kcache
[0].mode
) |
858 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf
->kcache
[0].bank
) |
859 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf
->kcache
[1].bank
);
861 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD1_CF_INST(cf
->inst
>> 3) |
862 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf
->kcache
[1].mode
) |
863 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf
->kcache
[0].addr
) |
864 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf
->kcache
[1].addr
) |
865 S_SQ_CF_ALU_WORD1_BARRIER(1) |
866 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc
->chiprev
== CHIPREV_R600
? cf
->r6xx_uses_waterfall
: 0) |
867 S_SQ_CF_ALU_WORD1_COUNT((cf
->ndw
/ 2) - 1);
869 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
870 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
871 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
872 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->addr
>> 1);
873 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
874 S_SQ_CF_WORD1_BARRIER(1) |
875 S_SQ_CF_WORD1_COUNT((cf
->ndw
/ 4) - 1);
877 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
878 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
879 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf
->output
.gpr
) |
880 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf
->output
.elem_size
) |
881 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf
->output
.array_base
) |
882 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf
->output
.type
);
883 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf
->output
.swizzle_x
) |
884 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf
->output
.swizzle_y
) |
885 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf
->output
.swizzle_z
) |
886 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf
->output
.swizzle_w
) |
887 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf
->output
.barrier
) |
888 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf
->output
.inst
) |
889 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf
->output
.end_of_program
);
891 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
892 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
893 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
894 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
895 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
896 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
897 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
898 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
:
899 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN
:
900 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->cf_addr
>> 1);
901 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
902 S_SQ_CF_WORD1_BARRIER(1) |
903 S_SQ_CF_WORD1_COND(cf
->cond
) |
904 S_SQ_CF_WORD1_POP_COUNT(cf
->pop_count
);
908 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
914 int r600_bc_build(struct r600_bc
*bc
)
916 struct r600_bc_cf
*cf
;
917 struct r600_bc_alu
*alu
;
918 struct r600_bc_vtx
*vtx
;
919 struct r600_bc_tex
*tex
;
923 if (bc
->callstack
[0].max
> 0)
924 bc
->nstack
= ((bc
->callstack
[0].max
+ 3) >> 2) + 2;
925 if (bc
->type
== TGSI_PROCESSOR_VERTEX
&& !bc
->nstack
) {
929 /* first path compute addr of each CF block */
930 /* addr start after all the CF instructions */
931 addr
= bc
->cf_last
->id
+ 2;
932 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
934 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
935 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
937 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
938 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
939 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
940 /* fetch node need to be 16 bytes aligned*/
942 addr
&= 0xFFFFFFFCUL
;
944 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
945 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
946 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
947 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
949 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
950 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
951 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
952 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
953 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
954 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
955 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
956 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
:
957 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN
:
960 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
965 bc
->ndw
= cf
->addr
+ cf
->ndw
;
968 bc
->bytecode
= calloc(1, bc
->ndw
* 4);
969 if (bc
->bytecode
== NULL
)
971 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
973 if (bc
->chiprev
== CHIPREV_EVERGREEN
)
974 r
= eg_bc_cf_build(bc
, cf
);
976 r
= r600_bc_cf_build(bc
, cf
);
980 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
981 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
982 LIST_FOR_EACH_ENTRY(alu
, &cf
->alu
, list
) {
983 switch(bc
->chiprev
) {
985 r
= r600_bc_alu_build(bc
, alu
, addr
);
988 case CHIPREV_EVERGREEN
: /* eg alu is same encoding as r700 */
989 r
= r700_bc_alu_build(bc
, alu
, addr
);
992 R600_ERR("unknown family %d\n", bc
->family
);
999 addr
+= alu
->nliteral
;
1003 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
1004 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
1005 LIST_FOR_EACH_ENTRY(vtx
, &cf
->vtx
, list
) {
1006 r
= r600_bc_vtx_build(bc
, vtx
, addr
);
1012 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
1013 LIST_FOR_EACH_ENTRY(tex
, &cf
->tex
, list
) {
1014 r
= r600_bc_tex_build(bc
, tex
, addr
);
1020 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
1021 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
1022 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
1023 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
1024 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
1025 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
1026 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
1027 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
1028 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
1029 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
1030 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
1031 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS
:
1032 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN
:
1035 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
1042 void r600_bc_clear(struct r600_bc
*bc
)
1044 struct r600_bc_cf
*cf
= NULL
, *next_cf
;
1047 bc
->bytecode
= NULL
;
1049 LIST_FOR_EACH_ENTRY_SAFE(cf
, next_cf
, &bc
->cf
, list
) {
1050 struct r600_bc_alu
*alu
= NULL
, *next_alu
;
1051 struct r600_bc_tex
*tex
= NULL
, *next_tex
;
1052 struct r600_bc_tex
*vtx
= NULL
, *next_vtx
;
1054 LIST_FOR_EACH_ENTRY_SAFE(alu
, next_alu
, &cf
->alu
, list
) {
1058 LIST_INITHEAD(&cf
->alu
);
1060 LIST_FOR_EACH_ENTRY_SAFE(tex
, next_tex
, &cf
->tex
, list
) {
1064 LIST_INITHEAD(&cf
->tex
);
1066 LIST_FOR_EACH_ENTRY_SAFE(vtx
, next_vtx
, &cf
->vtx
, list
) {
1070 LIST_INITHEAD(&cf
->vtx
);
1075 LIST_INITHEAD(&cf
->list
);
1078 void r600_bc_dump(struct r600_bc
*bc
)
1083 switch (bc
->chiprev
) {
1095 fprintf(stderr
, "bytecode %d dw -----------------------\n", bc
->ndw
);
1096 fprintf(stderr
, " %c\n", chip
);
1097 for (i
= 0; i
< bc
->ndw
; i
++) {
1098 fprintf(stderr
, "0x%08X\n", bc
->bytecode
[i
]);
1100 fprintf(stderr
, "--------------------------------------\n");
1103 void r600_cf_vtx(struct r600_vertex_element
*ve
, u32
*bytecode
, unsigned count
)
1105 struct r600_pipe_state
*rstate
;
1109 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1110 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX
) |
1111 S_SQ_CF_WORD1_BARRIER(1) |
1112 S_SQ_CF_WORD1_COUNT(8 - 1);
1113 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
1114 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX
) |
1115 S_SQ_CF_WORD1_BARRIER(1) |
1116 S_SQ_CF_WORD1_COUNT(count
- 8 - 1);
1118 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1119 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX
) |
1120 S_SQ_CF_WORD1_BARRIER(1) |
1121 S_SQ_CF_WORD1_COUNT(count
- 1);
1123 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(0);
1124 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN
) |
1125 S_SQ_CF_WORD1_BARRIER(1);
1127 rstate
= &ve
->rstate
;
1128 rstate
->id
= R600_PIPE_STATE_FETCH_SHADER
;
1130 r600_pipe_state_add_reg(rstate
, R_0288A4_SQ_PGM_RESOURCES_FS
,
1131 0x00000000, 0xFFFFFFFF, NULL
);
1132 r600_pipe_state_add_reg(rstate
, R_0288DC_SQ_PGM_CF_OFFSET_FS
,
1133 0x00000000, 0xFFFFFFFF, NULL
);
1134 r600_pipe_state_add_reg(rstate
, R_028894_SQ_PGM_START_FS
,
1135 r600_bo_offset(ve
->fetch_shader
) >> 8,
1136 0xFFFFFFFF, ve
->fetch_shader
);
1139 void r600_cf_vtx_tc(struct r600_vertex_element
*ve
, u32
*bytecode
, unsigned count
)
1141 struct r600_pipe_state
*rstate
;
1145 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1146 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
) |
1147 S_SQ_CF_WORD1_BARRIER(1) |
1148 S_SQ_CF_WORD1_COUNT(8 - 1);
1149 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
1150 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
) |
1151 S_SQ_CF_WORD1_BARRIER(1) |
1152 S_SQ_CF_WORD1_COUNT((count
- 8) - 1);
1154 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1155 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
) |
1156 S_SQ_CF_WORD1_BARRIER(1) |
1157 S_SQ_CF_WORD1_COUNT(count
- 1);
1159 bytecode
[i
++] = S_SQ_CF_WORD0_ADDR(0);
1160 bytecode
[i
++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN
) |
1161 S_SQ_CF_WORD1_BARRIER(1);
1163 rstate
= &ve
->rstate
;
1164 rstate
->id
= R600_PIPE_STATE_FETCH_SHADER
;
1166 r600_pipe_state_add_reg(rstate
, R_0288A4_SQ_PGM_RESOURCES_FS
,
1167 0x00000000, 0xFFFFFFFF, NULL
);
1168 r600_pipe_state_add_reg(rstate
, R_0288DC_SQ_PGM_CF_OFFSET_FS
,
1169 0x00000000, 0xFFFFFFFF, NULL
);
1170 r600_pipe_state_add_reg(rstate
, R_028894_SQ_PGM_START_FS
,
1171 r600_bo_offset(ve
->fetch_shader
) >> 8,
1172 0xFFFFFFFF, ve
->fetch_shader
);
1175 static void r600_vertex_data_type(enum pipe_format pformat
, unsigned *format
,
1176 unsigned *num_format
, unsigned *format_comp
)
1178 const struct util_format_description
*desc
;
1185 desc
= util_format_description(pformat
);
1186 if (desc
->layout
!= UTIL_FORMAT_LAYOUT_PLAIN
) {
1190 /* Find the first non-VOID channel. */
1191 for (i
= 0; i
< 4; i
++) {
1192 if (desc
->channel
[i
].type
!= UTIL_FORMAT_TYPE_VOID
) {
1197 switch (desc
->channel
[i
].type
) {
1198 /* Half-floats, floats, doubles */
1199 case UTIL_FORMAT_TYPE_FLOAT
:
1200 switch (desc
->channel
[i
].size
) {
1202 switch (desc
->nr_channels
) {
1204 *format
= FMT_16_FLOAT
;
1207 *format
= FMT_16_16_FLOAT
;
1210 *format
= FMT_16_16_16_FLOAT
;
1213 *format
= FMT_16_16_16_16_FLOAT
;
1218 switch (desc
->nr_channels
) {
1220 *format
= FMT_32_FLOAT
;
1223 *format
= FMT_32_32_FLOAT
;
1226 *format
= FMT_32_32_32_FLOAT
;
1229 *format
= FMT_32_32_32_32_FLOAT
;
1238 case UTIL_FORMAT_TYPE_UNSIGNED
:
1240 case UTIL_FORMAT_TYPE_SIGNED
:
1241 switch (desc
->channel
[i
].size
) {
1243 switch (desc
->nr_channels
) {
1251 // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
1254 *format
= FMT_8_8_8_8
;
1259 switch (desc
->nr_channels
) {
1264 *format
= FMT_16_16
;
1267 // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
1270 *format
= FMT_16_16_16_16
;
1275 switch (desc
->nr_channels
) {
1280 *format
= FMT_32_32
;
1283 *format
= FMT_32_32_32
;
1286 *format
= FMT_32_32_32_32
;
1298 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_SIGNED
) {
1301 if (desc
->channel
[i
].normalized
) {
1308 R600_ERR("unsupported vertex format %s\n", util_format_name(pformat
));
1311 static void r600_bc(unsigned ndw
, unsigned chiprev
, u32
*bytecode
)
1328 fprintf(stderr
, "bytecode %d dw -----------------------\n", ndw
);
1329 fprintf(stderr
, " %c\n", chip
);
1330 for (i
= 0; i
< ndw
; i
++) {
1331 fprintf(stderr
, "0x%08X\n", bytecode
[i
]);
1333 fprintf(stderr
, "--------------------------------------\n");
1336 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context
*rctx
, struct r600_vertex_element
*ve
)
1340 unsigned fetch_resource_start
= 0, format
, num_format
, format_comp
;
1341 struct pipe_vertex_element
*elements
= ve
->elements
;
1342 const struct util_format_description
*desc
;
1344 /* 2 dwords for cf aligned to 4 + 4 dwords per input */
1345 ndw
= 8 + ve
->count
* 4;
1346 ve
->fs_size
= ndw
* 4;
1348 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
1349 ve
->fetch_shader
= r600_bo(rctx
->radeon
, ndw
*4, 256, PIPE_BIND_VERTEX_BUFFER
, 0);
1350 if (ve
->fetch_shader
== NULL
) {
1354 bytecode
= r600_bo_map(rctx
->radeon
, ve
->fetch_shader
, 0, NULL
);
1355 if (bytecode
== NULL
) {
1356 r600_bo_reference(rctx
->radeon
, &ve
->fetch_shader
, NULL
);
1360 if (rctx
->family
>= CHIP_CEDAR
) {
1361 eg_cf_vtx(ve
, &bytecode
[0], (ndw
- 8) / 4);
1363 r600_cf_vtx(ve
, &bytecode
[0], (ndw
- 8) / 4);
1364 fetch_resource_start
= 160;
1367 /* vertex elements offset need special handling, if offset is bigger
1368 * than what we can put in fetch instruction then we need to alterate
1369 * the vertex resource offset. In such case in order to simplify code
1370 * we will bound one resource per elements. It's a worst case scenario.
1372 for (i
= 0; i
< ve
->count
; i
++) {
1373 ve
->vbuffer_offset
[i
] = C_SQ_VTX_WORD2_OFFSET
& elements
[i
].src_offset
;
1374 if (ve
->vbuffer_offset
[i
]) {
1375 ve
->vbuffer_need_offset
= 1;
1379 for (i
= 0; i
< ve
->count
; i
++) {
1380 unsigned vbuffer_index
;
1381 r600_vertex_data_type(ve
->hw_format
[i
], &format
, &num_format
, &format_comp
);
1382 desc
= util_format_description(ve
->hw_format
[i
]);
1384 R600_ERR("unknown format %d\n", ve
->hw_format
[i
]);
1385 r600_bo_reference(rctx
->radeon
, &ve
->fetch_shader
, NULL
);
1389 /* see above for vbuffer_need_offset explanation */
1390 vbuffer_index
= elements
[i
].vertex_buffer_index
;
1391 if (ve
->vbuffer_need_offset
) {
1392 bytecode
[8 + i
* 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i
+ fetch_resource_start
);
1394 bytecode
[8 + i
* 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index
+ fetch_resource_start
);
1396 bytecode
[8 + i
* 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
1397 S_SQ_VTX_WORD0_SRC_SEL_X(0) |
1398 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
1399 bytecode
[8 + i
* 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc
->swizzle
[0]) |
1400 S_SQ_VTX_WORD1_DST_SEL_Y(desc
->swizzle
[1]) |
1401 S_SQ_VTX_WORD1_DST_SEL_Z(desc
->swizzle
[2]) |
1402 S_SQ_VTX_WORD1_DST_SEL_W(desc
->swizzle
[3]) |
1403 S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
1404 S_SQ_VTX_WORD1_DATA_FORMAT(format
) |
1405 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format
) |
1406 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp
) |
1407 S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
1408 S_SQ_VTX_WORD1_GPR_DST_GPR(i
+ 1);
1409 bytecode
[8 + i
* 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements
[i
].src_offset
) |
1410 S_SQ_VTX_WORD2_MEGA_FETCH(1);
1411 bytecode
[8 + i
* 4 + 3] = 0;
1413 r600_bo_unmap(rctx
->radeon
, ve
->fetch_shader
);