2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "r600_context.h"
25 #include "util/u_memory.h"
30 int r700_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
);
32 static struct r600_bc_cf
*r600_bc_cf(void)
34 struct r600_bc_cf
*cf
= CALLOC_STRUCT(r600_bc_cf
);
38 LIST_INITHEAD(&cf
->list
);
39 LIST_INITHEAD(&cf
->alu
);
40 LIST_INITHEAD(&cf
->vtx
);
41 LIST_INITHEAD(&cf
->tex
);
45 static struct r600_bc_alu
*r600_bc_alu(void)
47 struct r600_bc_alu
*alu
= CALLOC_STRUCT(r600_bc_alu
);
51 LIST_INITHEAD(&alu
->list
);
55 static struct r600_bc_vtx
*r600_bc_vtx(void)
57 struct r600_bc_vtx
*vtx
= CALLOC_STRUCT(r600_bc_vtx
);
61 LIST_INITHEAD(&vtx
->list
);
65 static struct r600_bc_tex
*r600_bc_tex(void)
67 struct r600_bc_tex
*tex
= CALLOC_STRUCT(r600_bc_tex
);
71 LIST_INITHEAD(&tex
->list
);
75 int r600_bc_init(struct r600_bc
*bc
, enum radeon_family family
)
77 LIST_INITHEAD(&bc
->cf
);
97 R600_ERR("unknown family %d\n", bc
->family
);
103 static int r600_bc_add_cf(struct r600_bc
*bc
)
105 struct r600_bc_cf
*cf
= r600_bc_cf();
109 LIST_ADDTAIL(&cf
->list
, &bc
->cf
);
111 cf
->id
= bc
->cf_last
->id
+ 2;
115 bc
->force_add_cf
= 0;
119 int r600_bc_add_output(struct r600_bc
*bc
, const struct r600_bc_output
*output
)
123 r
= r600_bc_add_cf(bc
);
126 bc
->cf_last
->inst
= output
->inst
;
127 memcpy(&bc
->cf_last
->output
, output
, sizeof(struct r600_bc_output
));
131 int r600_bc_add_alu_type(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
, int type
)
133 struct r600_bc_alu
*nalu
= r600_bc_alu();
134 struct r600_bc_alu
*lalu
;
139 memcpy(nalu
, alu
, sizeof(struct r600_bc_alu
));
142 /* cf can contains only alu or only vtx or only tex */
143 if (bc
->cf_last
== NULL
|| bc
->cf_last
->inst
!= (type
<< 3) ||
145 /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
146 r
= r600_bc_add_cf(bc
);
151 bc
->cf_last
->inst
= (type
<< 3);
153 if (alu
->last
&& (bc
->cf_last
->ndw
>> 1) >= 124) {
154 bc
->force_add_cf
= 1;
156 /* number of gpr == the last gpr used in any alu */
157 for (i
= 0; i
< 3; i
++) {
158 if (alu
->src
[i
].sel
>= bc
->ngpr
&& alu
->src
[i
].sel
< 128) {
159 bc
->ngpr
= alu
->src
[i
].sel
+ 1;
161 /* compute how many literal are needed
162 * either 2 or 4 literals
164 if (alu
->src
[i
].sel
== 253) {
165 if (((alu
->src
[i
].chan
+ 2) & 0x6) > nalu
->nliteral
) {
166 nalu
->nliteral
= (alu
->src
[i
].chan
+ 2) & 0x6;
170 if (!LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
171 lalu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
172 if (!lalu
->last
&& lalu
->nliteral
> nalu
->nliteral
) {
173 nalu
->nliteral
= lalu
->nliteral
;
176 if (alu
->dst
.sel
>= bc
->ngpr
) {
177 bc
->ngpr
= alu
->dst
.sel
+ 1;
179 LIST_ADDTAIL(&nalu
->list
, &bc
->cf_last
->alu
);
180 /* each alu use 2 dwords */
181 bc
->cf_last
->ndw
+= 2;
186 int r600_bc_add_alu(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
)
188 return r600_bc_add_alu_type(bc
, alu
, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
);
191 int r600_bc_add_literal(struct r600_bc
*bc
, const u32
*value
)
193 struct r600_bc_alu
*alu
;
195 if (bc
->cf_last
== NULL
) {
198 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_TEX
) {
201 if (bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_JUMP
||
202 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_ELSE
||
203 bc
->cf_last
->inst
== V_SQ_CF_WORD1_SQ_CF_INST_POP
) {
206 if (((bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3)) &&
207 (bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3))) ||
208 LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
209 R600_ERR("last CF is not ALU (%p)\n", bc
->cf_last
);
212 alu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
213 if (!alu
->last
|| !alu
->nliteral
|| alu
->literal_added
) {
216 memcpy(alu
->value
, value
, 4 * 4);
217 bc
->cf_last
->ndw
+= alu
->nliteral
;
218 bc
->ndw
+= alu
->nliteral
;
219 alu
->literal_added
= 1;
223 int r600_bc_add_vtx(struct r600_bc
*bc
, const struct r600_bc_vtx
*vtx
)
225 struct r600_bc_vtx
*nvtx
= r600_bc_vtx();
230 memcpy(nvtx
, vtx
, sizeof(struct r600_bc_vtx
));
232 /* cf can contains only alu or only vtx or only tex */
233 if (bc
->cf_last
== NULL
||
234 (bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX
&&
235 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
)) {
236 r
= r600_bc_add_cf(bc
);
241 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_VTX
;
243 LIST_ADDTAIL(&nvtx
->list
, &bc
->cf_last
->vtx
);
244 /* each fetch use 4 dwords */
245 bc
->cf_last
->ndw
+= 4;
250 int r600_bc_add_tex(struct r600_bc
*bc
, const struct r600_bc_tex
*tex
)
252 struct r600_bc_tex
*ntex
= r600_bc_tex();
257 memcpy(ntex
, tex
, sizeof(struct r600_bc_tex
));
259 /* cf can contains only alu or only vtx or only tex */
260 if (bc
->cf_last
== NULL
||
261 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_TEX
) {
262 r
= r600_bc_add_cf(bc
);
267 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_TEX
;
269 LIST_ADDTAIL(&ntex
->list
, &bc
->cf_last
->tex
);
270 /* each texture fetch use 4 dwords */
271 bc
->cf_last
->ndw
+= 4;
276 int r600_bc_add_cfinst(struct r600_bc
*bc
, int inst
)
279 r
= r600_bc_add_cf(bc
);
283 bc
->cf_last
->cond
= V_SQ_CF_COND_ACTIVE
;
284 bc
->cf_last
->inst
= inst
;
288 static int r600_bc_vtx_build(struct r600_bc
*bc
, struct r600_bc_vtx
*vtx
, unsigned id
)
290 bc
->bytecode
[id
++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx
->buffer_id
) |
291 S_SQ_VTX_WORD0_SRC_GPR(vtx
->src_gpr
) |
292 S_SQ_VTX_WORD0_SRC_SEL_X(vtx
->src_sel_x
) |
293 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx
->mega_fetch_count
);
294 bc
->bytecode
[id
++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx
->dst_sel_x
) |
295 S_SQ_VTX_WORD1_DST_SEL_Y(vtx
->dst_sel_y
) |
296 S_SQ_VTX_WORD1_DST_SEL_Z(vtx
->dst_sel_z
) |
297 S_SQ_VTX_WORD1_DST_SEL_W(vtx
->dst_sel_w
) |
298 S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
299 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx
->dst_gpr
);
300 bc
->bytecode
[id
++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
301 bc
->bytecode
[id
++] = 0;
305 static int r600_bc_tex_build(struct r600_bc
*bc
, struct r600_bc_tex
*tex
, unsigned id
)
307 bc
->bytecode
[id
++] = S_SQ_TEX_WORD0_TEX_INST(tex
->inst
) |
308 S_SQ_TEX_WORD0_RESOURCE_ID(tex
->resource_id
) |
309 S_SQ_TEX_WORD0_SRC_GPR(tex
->src_gpr
) |
310 S_SQ_TEX_WORD0_SRC_REL(tex
->src_rel
);
311 bc
->bytecode
[id
++] = S_SQ_TEX_WORD1_DST_GPR(tex
->dst_gpr
) |
312 S_SQ_TEX_WORD1_DST_REL(tex
->dst_rel
) |
313 S_SQ_TEX_WORD1_DST_SEL_X(tex
->dst_sel_x
) |
314 S_SQ_TEX_WORD1_DST_SEL_Y(tex
->dst_sel_y
) |
315 S_SQ_TEX_WORD1_DST_SEL_Z(tex
->dst_sel_z
) |
316 S_SQ_TEX_WORD1_DST_SEL_W(tex
->dst_sel_w
) |
317 S_SQ_TEX_WORD1_LOD_BIAS(tex
->lod_bias
) |
318 S_SQ_TEX_WORD1_COORD_TYPE_X(tex
->coord_type_x
) |
319 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex
->coord_type_y
) |
320 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex
->coord_type_z
) |
321 S_SQ_TEX_WORD1_COORD_TYPE_W(tex
->coord_type_w
);
322 bc
->bytecode
[id
++] = S_SQ_TEX_WORD2_OFFSET_X(tex
->offset_x
) |
323 S_SQ_TEX_WORD2_OFFSET_Y(tex
->offset_y
) |
324 S_SQ_TEX_WORD2_OFFSET_Z(tex
->offset_z
) |
325 S_SQ_TEX_WORD2_SAMPLER_ID(tex
->sampler_id
) |
326 S_SQ_TEX_WORD2_SRC_SEL_X(tex
->src_sel_x
) |
327 S_SQ_TEX_WORD2_SRC_SEL_Y(tex
->src_sel_y
) |
328 S_SQ_TEX_WORD2_SRC_SEL_Z(tex
->src_sel_z
) |
329 S_SQ_TEX_WORD2_SRC_SEL_W(tex
->src_sel_w
);
330 bc
->bytecode
[id
++] = 0;
334 static int r600_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
)
338 /* don't replace gpr by pv or ps for destination register */
340 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
341 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
342 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
343 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
344 S_SQ_ALU_WORD0_LAST(alu
->last
);
345 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
346 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
347 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
348 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu
->src
[2].sel
) |
349 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu
->src
[2].chan
) |
350 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu
->src
[2].neg
) |
351 S_SQ_ALU_WORD1_OP3_ALU_INST(alu
->inst
) |
352 S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
354 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
355 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
356 S_SQ_ALU_WORD0_SRC0_NEG(alu
->src
[0].neg
) |
357 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
358 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
359 S_SQ_ALU_WORD0_SRC1_NEG(alu
->src
[1].neg
) |
360 S_SQ_ALU_WORD0_LAST(alu
->last
);
361 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
362 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
363 S_SQ_ALU_WORD1_CLAMP(alu
->dst
.clamp
) |
364 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu
->src
[0].abs
) |
365 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu
->src
[1].abs
) |
366 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu
->dst
.write
) |
367 S_SQ_ALU_WORD1_OP2_ALU_INST(alu
->inst
) |
368 S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
369 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu
->predicate
) |
370 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu
->predicate
);
373 for (i
= 0; i
< alu
->nliteral
; i
++) {
374 bc
->bytecode
[id
++] = alu
->value
[i
];
380 static int r600_bc_cf_build(struct r600_bc
*bc
, struct r600_bc_cf
*cf
)
382 unsigned id
= cf
->id
;
385 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
386 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
387 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD0_ADDR(cf
->addr
>> 1);
388 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD1_CF_INST(cf
->inst
>> 3) |
389 S_SQ_CF_ALU_WORD1_BARRIER(1) |
390 S_SQ_CF_ALU_WORD1_COUNT((cf
->ndw
/ 2) - 1);
392 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
393 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
394 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
395 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->addr
>> 1);
396 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
397 S_SQ_CF_WORD1_BARRIER(1) |
398 S_SQ_CF_WORD1_COUNT((cf
->ndw
/ 4) - 1);
400 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
401 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
402 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf
->output
.gpr
) |
403 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf
->output
.elem_size
) |
404 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf
->output
.array_base
) |
405 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf
->output
.type
);
406 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf
->output
.swizzle_x
) |
407 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf
->output
.swizzle_y
) |
408 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf
->output
.swizzle_z
) |
409 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf
->output
.swizzle_w
) |
410 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf
->output
.barrier
) |
411 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf
->output
.inst
) |
412 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf
->output
.end_of_program
);
414 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
415 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
416 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
417 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->cf_addr
>> 1);
418 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
419 S_SQ_CF_WORD1_BARRIER(1) |
420 S_SQ_CF_WORD1_COND(cf
->cond
) |
421 S_SQ_CF_WORD1_POP_COUNT(cf
->pop_count
);
425 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
431 int r600_bc_build(struct r600_bc
*bc
)
433 struct r600_bc_cf
*cf
;
434 struct r600_bc_alu
*alu
;
435 struct r600_bc_vtx
*vtx
;
436 struct r600_bc_tex
*tex
;
440 /* first path compute addr of each CF block */
441 /* addr start after all the CF instructions */
442 addr
= bc
->cf_last
->id
+ 2;
443 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
445 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
446 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
448 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
449 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
450 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
451 /* fetch node need to be 16 bytes aligned*/
453 addr
&= 0xFFFFFFFCUL
;
455 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
456 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
458 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
459 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
460 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
465 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
470 bc
->ndw
= cf
->addr
+ cf
->ndw
;
473 bc
->bytecode
= calloc(1, bc
->ndw
* 4);
474 if (bc
->bytecode
== NULL
)
476 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
478 r
= r600_bc_cf_build(bc
, cf
);
482 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
483 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE
<< 3):
484 LIST_FOR_EACH_ENTRY(alu
, &cf
->alu
, list
) {
485 switch(bc
->chiprev
) {
487 r
= r600_bc_alu_build(bc
, alu
, addr
);
490 r
= r700_bc_alu_build(bc
, alu
, addr
);
493 R600_ERR("unknown family %d\n", bc
->family
);
500 addr
+= alu
->nliteral
;
504 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
505 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
506 LIST_FOR_EACH_ENTRY(vtx
, &cf
->vtx
, list
) {
507 r
= r600_bc_vtx_build(bc
, vtx
, addr
);
513 case V_SQ_CF_WORD1_SQ_CF_INST_TEX
:
514 LIST_FOR_EACH_ENTRY(tex
, &cf
->tex
, list
) {
515 r
= r600_bc_tex_build(bc
, tex
, addr
);
521 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
522 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
523 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP
:
524 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE
:
525 case V_SQ_CF_WORD1_SQ_CF_INST_POP
:
528 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);