2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "util/u_memory.h"
26 #include "r600_pipe.h"
28 #include "r600_opcodes.h"
31 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu
*alu
)
37 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP
:
39 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD
:
40 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE
:
41 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT
:
42 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE
:
43 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE
:
44 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL
:
45 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX
:
46 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN
:
47 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE
:
48 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE
:
49 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT
:
50 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE
:
51 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE
:
52 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT
:
53 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE
:
54 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE
:
55 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4
:
56 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE
:
57 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE
:
60 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV
:
61 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR
:
62 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT
:
63 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR
:
64 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC
:
65 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE
:
66 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED
:
67 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE
:
68 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE
:
69 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE
:
70 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT
:
71 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN
:
72 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS
:
75 "Need instruction operand number for 0x%x.\n", alu
->inst
);
81 int r700_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
);
83 static struct r600_bc_cf
*r600_bc_cf(void)
85 struct r600_bc_cf
*cf
= CALLOC_STRUCT(r600_bc_cf
);
89 LIST_INITHEAD(&cf
->list
);
90 LIST_INITHEAD(&cf
->alu
);
91 LIST_INITHEAD(&cf
->vtx
);
92 LIST_INITHEAD(&cf
->tex
);
96 static struct r600_bc_alu
*r600_bc_alu(void)
98 struct r600_bc_alu
*alu
= CALLOC_STRUCT(r600_bc_alu
);
102 LIST_INITHEAD(&alu
->list
);
103 LIST_INITHEAD(&alu
->bs_list
);
107 static struct r600_bc_vtx
*r600_bc_vtx(void)
109 struct r600_bc_vtx
*vtx
= CALLOC_STRUCT(r600_bc_vtx
);
113 LIST_INITHEAD(&vtx
->list
);
117 static struct r600_bc_tex
*r600_bc_tex(void)
119 struct r600_bc_tex
*tex
= CALLOC_STRUCT(r600_bc_tex
);
123 LIST_INITHEAD(&tex
->list
);
127 int r600_bc_init(struct r600_bc
*bc
, enum radeon_family family
)
129 LIST_INITHEAD(&bc
->cf
);
131 switch (bc
->family
) {
156 R600_ERR("unknown family %d\n", bc
->family
);
162 static int r600_bc_add_cf(struct r600_bc
*bc
)
164 struct r600_bc_cf
*cf
= r600_bc_cf();
168 LIST_ADDTAIL(&cf
->list
, &bc
->cf
);
170 cf
->id
= bc
->cf_last
->id
+ 2;
174 bc
->force_add_cf
= 0;
178 int r600_bc_add_output(struct r600_bc
*bc
, const struct r600_bc_output
*output
)
182 r
= r600_bc_add_cf(bc
);
185 bc
->cf_last
->inst
= output
->inst
;
186 memcpy(&bc
->cf_last
->output
, output
, sizeof(struct r600_bc_output
));
190 const unsigned bank_swizzle_vec
[8] = {SQ_ALU_VEC_210
, //000
191 SQ_ALU_VEC_120
, //001
192 SQ_ALU_VEC_102
, //010
194 SQ_ALU_VEC_201
, //011
195 SQ_ALU_VEC_012
, //100
196 SQ_ALU_VEC_021
, //101
198 SQ_ALU_VEC_012
, //110
199 SQ_ALU_VEC_012
}; //111
201 const unsigned bank_swizzle_scl
[8] = {SQ_ALU_SCL_210
, //000
202 SQ_ALU_SCL_122
, //001
203 SQ_ALU_SCL_122
, //010
205 SQ_ALU_SCL_221
, //011
206 SQ_ALU_SCL_212
, //100
207 SQ_ALU_SCL_122
, //101
209 SQ_ALU_SCL_122
, //110
210 SQ_ALU_SCL_122
}; //111
212 static int init_gpr(struct r600_bc_alu
*alu
)
214 int cycle
, component
;
216 for (cycle
= 0; cycle
< NUM_OF_CYCLES
; cycle
++)
217 for (component
= 0; component
< NUM_OF_COMPONENTS
; component
++)
218 alu
->hw_gpr
[cycle
][component
] = -1;
223 static int reserve_gpr(struct r600_bc_alu
*alu
, unsigned sel
, unsigned chan
, unsigned cycle
)
225 if (alu
->hw_gpr
[cycle
][chan
] < 0)
226 alu
->hw_gpr
[cycle
][chan
] = sel
;
227 else if (alu
->hw_gpr
[cycle
][chan
] != (int)sel
) {
228 R600_ERR("Another scalar operation has already used GPR read port for channel\n");
234 static int cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, unsigned *p_cycle
)
240 table
[0] = 2; table
[1] = 1; table
[2] = 0;
241 *p_cycle
= table
[sel
];
244 table
[0] = 1; table
[1] = 2; table
[2] = 2;
245 *p_cycle
= table
[sel
];
248 table
[0] = 2; table
[1] = 1; table
[2] = 2;
249 *p_cycle
= table
[sel
];
252 table
[0] = 2; table
[1] = 2; table
[2] = 1;
253 *p_cycle
= table
[sel
];
257 R600_ERR("bad scalar bank swizzle value\n");
264 static int cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, unsigned *p_cycle
)
271 table
[0] = 0; table
[1] = 1; table
[2] = 2;
272 *p_cycle
= table
[sel
];
275 table
[0] = 0; table
[1] = 2; table
[2] = 1;
276 *p_cycle
= table
[sel
];
279 table
[0] = 1; table
[1] = 2; table
[2] = 0;
280 *p_cycle
= table
[sel
];
283 table
[0] = 1; table
[1] = 0; table
[2] = 2;
284 *p_cycle
= table
[sel
];
287 table
[0] = 2; table
[1] = 0; table
[2] = 1;
288 *p_cycle
= table
[sel
];
291 table
[0] = 2; table
[1] = 1; table
[2] = 0;
292 *p_cycle
= table
[sel
];
295 R600_ERR("bad vector bank swizzle value\n");
304 static void update_chan_counter(struct r600_bc_alu
*alu
, int *chan_counter
)
310 num_src
= r600_bc_get_num_operands(alu
);
312 for (i
= 0; i
< num_src
; i
++) {
313 channel_swizzle
= alu
->src
[i
].chan
;
314 if ((alu
->src
[i
].sel
> 0 && alu
->src
[i
].sel
< 128) && channel_swizzle
<= 3)
315 chan_counter
[channel_swizzle
]++;
319 /* we need something like this I think - but this is bogus */
320 int check_read_slots(struct r600_bc
*bc
, struct r600_bc_alu
*alu_first
)
322 struct r600_bc_alu
*alu
;
323 int chan_counter
[4] = { 0 };
325 update_chan_counter(alu_first
, chan_counter
);
327 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
328 update_chan_counter(alu
, chan_counter
);
331 if (chan_counter
[0] > 3 ||
332 chan_counter
[1] > 3 ||
333 chan_counter
[2] > 3 ||
334 chan_counter
[3] > 3) {
335 R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
336 alu_first
->inst
, chan_counter
[0], chan_counter
[1], chan_counter
[2], chan_counter
[3]);
343 static int is_const(int sel
)
345 if (sel
> 255 && sel
< 512)
347 if (sel
>= V_SQ_ALU_SRC_0
&& sel
<= V_SQ_ALU_SRC_LITERAL
)
352 static int check_scalar(struct r600_bc
*bc
, struct r600_bc_alu
*alu
)
354 unsigned swizzle_key
;
356 if (alu
->bank_swizzle_force
) {
357 alu
->bank_swizzle
= alu
->bank_swizzle_force
;
360 swizzle_key
= (is_const(alu
->src
[0].sel
) ? 4 : 0 ) +
361 (is_const(alu
->src
[1].sel
) ? 2 : 0 ) +
362 (is_const(alu
->src
[2].sel
) ? 1 : 0 );
364 alu
->bank_swizzle
= bank_swizzle_scl
[swizzle_key
];
368 static int check_vector(struct r600_bc
*bc
, struct r600_bc_alu
*alu
)
370 unsigned swizzle_key
;
372 if (alu
->bank_swizzle_force
) {
373 alu
->bank_swizzle
= alu
->bank_swizzle_force
;
376 swizzle_key
= (is_const(alu
->src
[0].sel
) ? 4 : 0 ) +
377 (is_const(alu
->src
[1].sel
) ? 2 : 0 ) +
378 (is_const(alu
->src
[2].sel
) ? 1 : 0 );
380 alu
->bank_swizzle
= bank_swizzle_vec
[swizzle_key
];
384 static int check_and_set_bank_swizzle(struct r600_bc
*bc
, struct r600_bc_alu
*alu_first
)
386 struct r600_bc_alu
*alu
= NULL
;
391 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
395 if (num_instr
== 1) {
396 check_scalar(bc
, alu_first
);
399 /* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
400 check_vector(bc
, alu_first
);
401 LIST_FOR_EACH_ENTRY(alu
, &alu_first
->bs_list
, bs_list
) {
402 check_vector(bc
, alu
);
408 int r600_bc_add_alu_type(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
, int type
)
410 struct r600_bc_alu
*nalu
= r600_bc_alu();
411 struct r600_bc_alu
*lalu
;
416 memcpy(nalu
, alu
, sizeof(struct r600_bc_alu
));
419 /* cf can contains only alu or only vtx or only tex */
420 if (bc
->cf_last
== NULL
|| bc
->cf_last
->inst
!= (type
<< 3) ||
422 r
= r600_bc_add_cf(bc
);
427 bc
->cf_last
->inst
= (type
<< 3);
429 if (!bc
->cf_last
->curr_bs_head
) {
430 bc
->cf_last
->curr_bs_head
= nalu
;
431 LIST_INITHEAD(&nalu
->bs_list
);
433 LIST_ADDTAIL(&nalu
->bs_list
, &bc
->cf_last
->curr_bs_head
->bs_list
);
435 /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
437 if (alu
->last
&& (bc
->cf_last
->ndw
>> 1) >= 120) {
438 bc
->force_add_cf
= 1;
440 /* number of gpr == the last gpr used in any alu */
441 for (i
= 0; i
< 3; i
++) {
442 if (alu
->src
[i
].sel
>= bc
->ngpr
&& alu
->src
[i
].sel
< 128) {
443 bc
->ngpr
= alu
->src
[i
].sel
+ 1;
445 /* compute how many literal are needed
446 * either 2 or 4 literals
448 if (alu
->src
[i
].sel
== 253) {
449 if (((alu
->src
[i
].chan
+ 2) & 0x6) > nalu
->nliteral
) {
450 nalu
->nliteral
= (alu
->src
[i
].chan
+ 2) & 0x6;
454 if (!LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
455 lalu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
456 if (!lalu
->last
&& lalu
->nliteral
> nalu
->nliteral
) {
457 nalu
->nliteral
= lalu
->nliteral
;
460 if (alu
->dst
.sel
>= bc
->ngpr
) {
461 bc
->ngpr
= alu
->dst
.sel
+ 1;
463 LIST_ADDTAIL(&nalu
->list
, &bc
->cf_last
->alu
);
464 /* each alu use 2 dwords */
465 bc
->cf_last
->ndw
+= 2;
468 bc
->cf_last
->kcache0_mode
= 2;
470 /* process cur ALU instructions for bank swizzle */
472 check_and_set_bank_swizzle(bc
, bc
->cf_last
->curr_bs_head
);
473 bc
->cf_last
->curr_bs_head
= NULL
;
478 int r600_bc_add_alu(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
)
480 return r600_bc_add_alu_type(bc
, alu
, BC_INST(bc
, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
));
483 int r600_bc_add_literal(struct r600_bc
*bc
, const u32
*value
)
485 struct r600_bc_alu
*alu
;
487 if (bc
->cf_last
== NULL
) {
490 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_TEX
) {
494 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_JUMP
||
495 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_ELSE
||
496 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
||
497 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
||
498 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
||
499 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
||
500 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_POP
) {
504 if (((bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3)) &&
505 (bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3))) ||
506 LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
507 R600_ERR("last CF is not ALU (%p)\n", bc
->cf_last
);
510 alu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
511 if (!alu
->last
|| !alu
->nliteral
|| alu
->literal_added
) {
514 memcpy(alu
->value
, value
, 4 * 4);
515 bc
->cf_last
->ndw
+= alu
->nliteral
;
516 bc
->ndw
+= alu
->nliteral
;
517 alu
->literal_added
= 1;
521 int r600_bc_add_vtx(struct r600_bc
*bc
, const struct r600_bc_vtx
*vtx
)
523 struct r600_bc_vtx
*nvtx
= r600_bc_vtx();
528 memcpy(nvtx
, vtx
, sizeof(struct r600_bc_vtx
));
530 /* cf can contains only alu or only vtx or only tex */
531 if (bc
->cf_last
== NULL
||
532 (bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX
&&
533 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
) ||
535 r
= r600_bc_add_cf(bc
);
540 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_VTX
;
542 LIST_ADDTAIL(&nvtx
->list
, &bc
->cf_last
->vtx
);
543 /* each fetch use 4 dwords */
544 bc
->cf_last
->ndw
+= 4;
546 if ((bc
->ndw
/ 4) > 7)
547 bc
->force_add_cf
= 1;
551 int r600_bc_add_tex(struct r600_bc
*bc
, const struct r600_bc_tex
*tex
)
553 struct r600_bc_tex
*ntex
= r600_bc_tex();
558 memcpy(ntex
, tex
, sizeof(struct r600_bc_tex
));
560 /* cf can contains only alu or only vtx or only tex */
561 if (bc
->cf_last
== NULL
||
562 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_TEX
||
564 r
= r600_bc_add_cf(bc
);
569 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_TEX
;
571 LIST_ADDTAIL(&ntex
->list
, &bc
->cf_last
->tex
);
572 /* each texture fetch use 4 dwords */
573 bc
->cf_last
->ndw
+= 4;
575 if ((bc
->ndw
/ 4) > 7)
576 bc
->force_add_cf
= 1;
580 int r600_bc_add_cfinst(struct r600_bc
*bc
, int inst
)
583 r
= r600_bc_add_cf(bc
);
587 bc
->cf_last
->cond
= V_SQ_CF_COND_ACTIVE
;
588 bc
->cf_last
->inst
= inst
;
592 /* common to all 3 families */
593 static int r600_bc_vtx_build(struct r600_bc
*bc
, struct r600_bc_vtx
*vtx
, unsigned id
)
595 bc
->bytecode
[id
++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx
->buffer_id
) |
596 S_SQ_VTX_WORD0_SRC_GPR(vtx
->src_gpr
) |
597 S_SQ_VTX_WORD0_SRC_SEL_X(vtx
->src_sel_x
) |
598 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx
->mega_fetch_count
);
599 bc
->bytecode
[id
++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx
->dst_sel_x
) |
600 S_SQ_VTX_WORD1_DST_SEL_Y(vtx
->dst_sel_y
) |
601 S_SQ_VTX_WORD1_DST_SEL_Z(vtx
->dst_sel_z
) |
602 S_SQ_VTX_WORD1_DST_SEL_W(vtx
->dst_sel_w
) |
603 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx
->use_const_fields
) |
604 S_SQ_VTX_WORD1_DATA_FORMAT(vtx
->data_format
) |
605 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx
->num_format_all
) |
606 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx
->format_comp_all
) |
607 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx
->srf_mode_all
) |
608 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx
->dst_gpr
);
609 bc
->bytecode
[id
++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
610 bc
->bytecode
[id
++] = 0;
614 /* common to all 3 families */
615 static int r600_bc_tex_build(struct r600_bc
*bc
, struct r600_bc_tex
*tex
, unsigned id
)
617 bc
->bytecode
[id
++] = S_SQ_TEX_WORD0_TEX_INST(tex
->inst
) |
618 S_SQ_TEX_WORD0_RESOURCE_ID(tex
->resource_id
) |
619 S_SQ_TEX_WORD0_SRC_GPR(tex
->src_gpr
) |
620 S_SQ_TEX_WORD0_SRC_REL(tex
->src_rel
);
621 bc
->bytecode
[id
++] = S_SQ_TEX_WORD1_DST_GPR(tex
->dst_gpr
) |
622 S_SQ_TEX_WORD1_DST_REL(tex
->dst_rel
) |
623 S_SQ_TEX_WORD1_DST_SEL_X(tex
->dst_sel_x
) |
624 S_SQ_TEX_WORD1_DST_SEL_Y(tex
->dst_sel_y
) |
625 S_SQ_TEX_WORD1_DST_SEL_Z(tex
->dst_sel_z
) |
626 S_SQ_TEX_WORD1_DST_SEL_W(tex
->dst_sel_w
) |
627 S_SQ_TEX_WORD1_LOD_BIAS(tex
->lod_bias
) |
628 S_SQ_TEX_WORD1_COORD_TYPE_X(tex
->coord_type_x
) |
629 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex
->coord_type_y
) |
630 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex
->coord_type_z
) |
631 S_SQ_TEX_WORD1_COORD_TYPE_W(tex
->coord_type_w
);
632 bc
->bytecode
[id
++] = S_SQ_TEX_WORD2_OFFSET_X(tex
->offset_x
) |
633 S_SQ_TEX_WORD2_OFFSET_Y(tex
->offset_y
) |
634 S_SQ_TEX_WORD2_OFFSET_Z(tex
->offset_z
) |
635 S_SQ_TEX_WORD2_SAMPLER_ID(tex
->sampler_id
) |
636 S_SQ_TEX_WORD2_SRC_SEL_X(tex
->src_sel_x
) |
637 S_SQ_TEX_WORD2_SRC_SEL_Y(tex
->src_sel_y
) |
638 S_SQ_TEX_WORD2_SRC_SEL_Z(tex
->src_sel_z
) |
639 S_SQ_TEX_WORD2_SRC_SEL_W(tex
->src_sel_w
);
640 bc
->bytecode
[id
++] = 0;
644 /* r600 only, r700/eg bits in r700_asm.c */
645 static int r600_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
)
649 /* don't replace gpr by pv or ps for destination register */
650 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
651 S_SQ_ALU_WORD0_SRC0_REL(alu
->src
[0].rel
) |
652 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
653 S_SQ_ALU_WORD0_SRC0_NEG(alu
->src
[0].neg
) |
654 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
655 S_SQ_ALU_WORD0_SRC1_REL(alu
->src
[1].rel
) |
656 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
657 S_SQ_ALU_WORD0_SRC1_NEG(alu
->src
[1].neg
) |
658 S_SQ_ALU_WORD0_LAST(alu
->last
);
661 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
662 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
663 S_SQ_ALU_WORD1_DST_REL(alu
->dst
.rel
) |
664 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
665 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu
->src
[2].sel
) |
666 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu
->src
[2].rel
) |
667 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu
->src
[2].chan
) |
668 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu
->src
[2].neg
) |
669 S_SQ_ALU_WORD1_OP3_ALU_INST(alu
->inst
) |
670 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu
->bank_swizzle
);
672 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
673 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
674 S_SQ_ALU_WORD1_DST_REL(alu
->dst
.rel
) |
675 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
676 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu
->src
[0].abs
) |
677 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu
->src
[1].abs
) |
678 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu
->dst
.write
) |
679 S_SQ_ALU_WORD1_OP2_ALU_INST(alu
->inst
) |
680 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu
->bank_swizzle
) |
681 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu
->predicate
) |
682 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu
->predicate
);
685 if (alu
->nliteral
&& !alu
->literal_added
) {
686 R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu
->inst
);
688 for (i
= 0; i
< alu
->nliteral
; i
++) {
689 bc
->bytecode
[id
++] = alu
->value
[i
];
695 /* common for r600/r700 - eg in eg_asm.c */
696 static int r600_bc_cf_build(struct r600_bc
*bc
, struct r600_bc_cf
*cf
)
698 unsigned id
= cf
->id
;
701 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
702 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
703 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD0_ADDR(cf
->addr
>> 1) |
704 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf
->kcache0_mode
) |
705 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf
->kcache0_bank
) |
706 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf
->kcache1_bank
);
708 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD1_CF_INST(cf
->inst
>> 3) |
709 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf
->kcache1_mode
) |
710 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf
->kcache0_addr
) |
711 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf
->kcache1_addr
) |
712 S_SQ_CF_ALU_WORD1_BARRIER(1) |
713 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc
->chiprev
== 0 ? cf
->r6xx_uses_waterfall
: 0) |
714 S_SQ_CF_ALU_WORD1_COUNT((cf
->ndw
/ 2) - 1);
716 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
717 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
718 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
719 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->addr
>> 1);
720 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
721 S_SQ_CF_WORD1_BARRIER(1) |
722 S_SQ_CF_WORD1_COUNT((cf
->ndw
/ 4) - 1);
724 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
725 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
726 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf
->output
.gpr
) |
727 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf
->output
.elem_size
) |
728 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf
->output
.array_base
) |
729 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf
->output
.type
);
730 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf
->output
.swizzle_x
) |
731 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf
->output
.swizzle_y
) |
732 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf
->output
.swizzle_z
) |
733 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf
->output
.swizzle_w
) |
734 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf
->output
.barrier
) |
735 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf
->output
.inst
) |
736 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf
->output
.end_of_program
);
738 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
739 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
740 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
741 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
742 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
743 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
744 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
745 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->cf_addr
>> 1);
746 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
747 S_SQ_CF_WORD1_BARRIER(1) |
748 S_SQ_CF_WORD1_COND(cf
->cond
) |
749 S_SQ_CF_WORD1_POP_COUNT(cf
->pop_count
);
753 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
759 int r600_bc_build(struct r600_bc
*bc
)
761 struct r600_bc_cf
*cf
;
762 struct r600_bc_alu
*alu
;
763 struct r600_bc_vtx
*vtx
;
764 struct r600_bc_tex
*tex
;
768 if (bc
->callstack
[0].max
> 0)
769 bc
->nstack
= ((bc
->callstack
[0].max
+ 3) >> 2) + 2;
771 /* first path compute addr of each CF block */
772 /* addr start after all the CF instructions */
773 addr
= bc
->cf_last
->id
+ 2;
774 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
776 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
777 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
779 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
780 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
781 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
782 /* fetch node need to be 16 bytes aligned*/
784 addr
&= 0xFFFFFFFCUL
;
786 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
787 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
788 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
789 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
791 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
792 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
793 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
794 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
795 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
796 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
797 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
800 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
805 bc
->ndw
= cf
->addr
+ cf
->ndw
;
808 bc
->bytecode
= calloc(1, bc
->ndw
* 4);
809 if (bc
->bytecode
== NULL
)
811 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
813 if (bc
->chiprev
== 2)
814 r
= eg_bc_cf_build(bc
, cf
);
816 r
= r600_bc_cf_build(bc
, cf
);
820 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
821 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
822 LIST_FOR_EACH_ENTRY(alu
, &cf
->alu
, list
) {
823 switch(bc
->chiprev
) {
825 r
= r600_bc_alu_build(bc
, alu
, addr
);
828 case 2: /* eg alu is same encoding as r700 */
829 r
= r700_bc_alu_build(bc
, alu
, addr
);
832 R600_ERR("unknown family %d\n", bc
->family
);
839 addr
+= alu
->nliteral
;
843 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
844 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
845 LIST_FOR_EACH_ENTRY(vtx
, &cf
->vtx
, list
) {
846 r
= r600_bc_vtx_build(bc
, vtx
, addr
);
852 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
853 LIST_FOR_EACH_ENTRY(tex
, &cf
->tex
, list
) {
854 r
= r600_bc_tex_build(bc
, tex
, addr
);
860 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
861 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
862 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
863 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
864 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL
:
865 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END
:
866 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE
:
867 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK
:
868 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
869 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
870 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
873 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
880 void r600_bc_clear(struct r600_bc
*bc
)
882 struct r600_bc_cf
*cf
, *next_cf
;
887 LIST_FOR_EACH_ENTRY_SAFE(cf
, next_cf
, &bc
->cf
, list
) {
888 struct r600_bc_alu
*alu
, *next_alu
;
889 struct r600_bc_tex
*tex
, *next_tex
;
890 struct r600_bc_tex
*vtx
, *next_vtx
;
892 LIST_FOR_EACH_ENTRY_SAFE(alu
, next_alu
, &cf
->alu
, list
) {
896 LIST_INITHEAD(&cf
->alu
);
898 LIST_FOR_EACH_ENTRY_SAFE(tex
, next_tex
, &cf
->tex
, list
) {
902 LIST_INITHEAD(&cf
->tex
);
904 LIST_FOR_EACH_ENTRY_SAFE(vtx
, next_vtx
, &cf
->vtx
, list
) {
908 LIST_INITHEAD(&cf
->vtx
);
913 LIST_INITHEAD(&cf
->list
);