2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "r600_context.h"
25 #include "util/u_memory.h"
30 int r700_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
);
32 static struct r600_bc_cf
*r600_bc_cf(void)
34 struct r600_bc_cf
*cf
= CALLOC_STRUCT(r600_bc_cf
);
38 LIST_INITHEAD(&cf
->list
);
39 LIST_INITHEAD(&cf
->alu
);
40 LIST_INITHEAD(&cf
->vtx
);
44 static struct r600_bc_alu
*r600_bc_alu(void)
46 struct r600_bc_alu
*alu
= CALLOC_STRUCT(r600_bc_alu
);
50 LIST_INITHEAD(&alu
->list
);
54 static struct r600_bc_vtx
*r600_bc_vtx(void)
56 struct r600_bc_vtx
*vtx
= CALLOC_STRUCT(r600_bc_vtx
);
60 LIST_INITHEAD(&vtx
->list
);
64 int r600_bc_init(struct r600_bc
*bc
, enum radeon_family family
)
66 LIST_INITHEAD(&bc
->cf
);
71 static int r600_bc_add_cf(struct r600_bc
*bc
)
73 struct r600_bc_cf
*cf
= r600_bc_cf();
77 LIST_ADDTAIL(&cf
->list
, &bc
->cf
);
79 cf
->id
= bc
->cf_last
->id
+ 2;
86 int r600_bc_add_output(struct r600_bc
*bc
, const struct r600_bc_output
*output
)
90 r
= r600_bc_add_cf(bc
);
93 bc
->cf_last
->inst
= output
->inst
;
94 memcpy(&bc
->cf_last
->output
, output
, sizeof(struct r600_bc_output
));
98 int r600_bc_add_alu(struct r600_bc
*bc
, const struct r600_bc_alu
*alu
)
100 struct r600_bc_alu
*nalu
= r600_bc_alu();
101 struct r600_bc_alu
*lalu
;
106 memcpy(nalu
, alu
, sizeof(struct r600_bc_alu
));
109 /* cf can contains only alu or only vtx or only tex */
110 if (bc
->cf_last
== NULL
|| bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3)) {
111 r
= r600_bc_add_cf(bc
);
116 bc
->cf_last
->inst
= V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3;
118 /* number of gpr == the last gpr used in any alu */
119 for (i
= 0; i
< 3; i
++) {
120 if (alu
->src
[i
].sel
>= bc
->ngpr
&& alu
->src
[i
].sel
< 128) {
121 bc
->ngpr
= alu
->src
[i
].sel
+ 1;
123 /* compute how many literal are needed
124 * either 2 or 4 literals
126 if (alu
->src
[i
].sel
== 253) {
127 if (((alu
->src
[i
].chan
+ 2) & 0x6) > nalu
->nliteral
) {
128 nalu
->nliteral
= (alu
->src
[i
].chan
+ 2) & 0x6;
132 if (!LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
133 lalu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
134 if (!lalu
->last
&& lalu
->nliteral
> nalu
->nliteral
) {
135 nalu
->nliteral
= lalu
->nliteral
;
138 if (alu
->dst
.sel
>= bc
->ngpr
) {
139 bc
->ngpr
= alu
->dst
.sel
+ 1;
141 LIST_ADDTAIL(&nalu
->list
, &bc
->cf_last
->alu
);
142 /* each alu use 2 dwords */
143 bc
->cf_last
->ndw
+= 2;
148 int r600_bc_add_literal(struct r600_bc
*bc
, const u32
*value
)
150 struct r600_bc_alu
*alu
;
152 if (bc
->cf_last
== NULL
||
153 bc
->cf_last
->inst
!= (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3) ||
154 LIST_IS_EMPTY(&bc
->cf_last
->alu
)) {
155 R600_ERR("last CF is not ALU (%p)\n", bc
->cf_last
);
158 alu
= LIST_ENTRY(struct r600_bc_alu
, bc
->cf_last
->alu
.prev
, list
);
159 if (!alu
->last
|| !alu
->nliteral
) {
162 memcpy(alu
->value
, value
, 4 * 4);
163 bc
->cf_last
->ndw
+= alu
->nliteral
;
164 bc
->ndw
+= alu
->nliteral
;
168 int r600_bc_add_vtx(struct r600_bc
*bc
, const struct r600_bc_vtx
*vtx
)
170 struct r600_bc_vtx
*nvtx
= r600_bc_vtx();
175 memcpy(nvtx
, vtx
, sizeof(struct r600_bc_vtx
));
177 /* cf can contains only alu or only vtx or only tex */
178 if (bc
->cf_last
== NULL
||
179 (bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX
&&
180 bc
->cf_last
->inst
!= V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
)) {
181 r
= r600_bc_add_cf(bc
);
186 bc
->cf_last
->inst
= V_SQ_CF_WORD1_SQ_CF_INST_VTX
;
188 LIST_ADDTAIL(&nvtx
->list
, &bc
->cf_last
->vtx
);
189 /* each fetch use 6 dwords */
190 bc
->cf_last
->ndw
+= 4;
195 int r600_bc_vtx_build(struct r600_bc
*bc
, struct r600_bc_vtx
*vtx
, unsigned id
)
197 bc
->bytecode
[id
++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx
->buffer_id
) |
198 S_SQ_VTX_WORD0_SRC_GPR(vtx
->src_gpr
) |
199 S_SQ_VTX_WORD0_SRC_SEL_X(vtx
->src_sel_x
) |
200 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx
->mega_fetch_count
);
201 bc
->bytecode
[id
++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx
->dst_sel_x
) |
202 S_SQ_VTX_WORD1_DST_SEL_Y(vtx
->dst_sel_y
) |
203 S_SQ_VTX_WORD1_DST_SEL_Z(vtx
->dst_sel_z
) |
204 S_SQ_VTX_WORD1_DST_SEL_W(vtx
->dst_sel_w
) |
205 S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
206 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx
->dst_gpr
);
207 bc
->bytecode
[id
++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
208 bc
->bytecode
[id
++] = 0;
212 int r600_bc_alu_build(struct r600_bc
*bc
, struct r600_bc_alu
*alu
, unsigned id
)
216 /* don't replace gpr by pv or ps for destination register */
218 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
219 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
220 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
221 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
222 S_SQ_ALU_WORD0_LAST(alu
->last
);
223 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
224 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
225 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu
->src
[2].sel
) |
226 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu
->src
[2].chan
) |
227 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu
->src
[2].neg
) |
228 S_SQ_ALU_WORD1_OP3_ALU_INST(alu
->inst
) |
229 S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
231 bc
->bytecode
[id
++] = S_SQ_ALU_WORD0_SRC0_SEL(alu
->src
[0].sel
) |
232 S_SQ_ALU_WORD0_SRC0_CHAN(alu
->src
[0].chan
) |
233 S_SQ_ALU_WORD0_SRC0_NEG(alu
->src
[0].neg
) |
234 S_SQ_ALU_WORD0_SRC1_SEL(alu
->src
[1].sel
) |
235 S_SQ_ALU_WORD0_SRC1_CHAN(alu
->src
[1].chan
) |
236 S_SQ_ALU_WORD0_SRC1_NEG(alu
->src
[1].neg
) |
237 S_SQ_ALU_WORD0_LAST(alu
->last
);
238 bc
->bytecode
[id
++] = S_SQ_ALU_WORD1_DST_GPR(alu
->dst
.sel
) |
239 S_SQ_ALU_WORD1_DST_CHAN(alu
->dst
.chan
) |
240 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu
->src
[0].abs
) |
241 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu
->src
[1].abs
) |
242 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu
->dst
.write
) |
243 S_SQ_ALU_WORD1_OP2_ALU_INST(alu
->inst
) |
244 S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
247 for (i
= 0; i
< alu
->nliteral
; i
++) {
248 bc
->bytecode
[id
++] = alu
->value
[i
];
254 int r600_bc_cf_build(struct r600_bc
*bc
, struct r600_bc_cf
*cf
)
256 unsigned id
= cf
->id
;
259 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
260 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD0_ADDR(cf
->addr
>> 1);
261 bc
->bytecode
[id
++] = S_SQ_CF_ALU_WORD1_CF_INST(cf
->inst
>> 3) |
262 S_SQ_CF_ALU_WORD1_BARRIER(1) |
263 S_SQ_CF_ALU_WORD1_COUNT((cf
->ndw
/ 2) - 1);
265 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
266 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
267 bc
->bytecode
[id
++] = S_SQ_CF_WORD0_ADDR(cf
->addr
>> 1);
268 bc
->bytecode
[id
++] = S_SQ_CF_WORD1_CF_INST(cf
->inst
) |
269 S_SQ_CF_WORD1_BARRIER(1) |
270 S_SQ_CF_WORD1_COUNT((cf
->ndw
/ 4) - 1);
272 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
273 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
274 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf
->output
.gpr
) |
275 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf
->output
.elem_size
) |
276 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf
->output
.array_base
) |
277 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf
->output
.type
);
278 bc
->bytecode
[id
++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf
->output
.swizzle_x
) |
279 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf
->output
.swizzle_y
) |
280 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf
->output
.swizzle_z
) |
281 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf
->output
.swizzle_w
) |
282 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf
->output
.barrier
) |
283 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf
->output
.inst
) |
284 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf
->output
.end_of_program
);
287 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
293 int r600_bc_build(struct r600_bc
*bc
)
295 struct r600_bc_cf
*cf
;
296 struct r600_bc_alu
*alu
;
297 struct r600_bc_vtx
*vtx
;
302 /* first path compute addr of each CF block */
303 /* addr start after all the CF instructions */
304 addr
= bc
->cf_last
->id
+ 2;
305 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
307 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
309 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
310 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
311 /* fetch node need to be 16 bytes aligned*/
313 addr
&= 0xFFFFFFFCUL
;
315 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
316 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
319 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);
324 bc
->ndw
= cf
->addr
+ cf
->ndw
;
327 bc
->bytecode
= calloc(1, bc
->ndw
* 4);
328 if (bc
->bytecode
== NULL
)
330 LIST_FOR_EACH_ENTRY(cf
, &bc
->cf
, list
) {
332 r
= r600_bc_cf_build(bc
, cf
);
336 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU
<< 3):
337 LIST_FOR_EACH_ENTRY(alu
, &cf
->alu
, list
) {
338 switch (bc
->family
) {
347 r
= r600_bc_alu_build(bc
, alu
, addr
);
353 r
= r700_bc_alu_build(bc
, alu
, addr
);
356 R600_ERR("unknown family %d\n", bc
->family
);
363 addr
+= alu
->nliteral
;
367 case V_SQ_CF_WORD1_SQ_CF_INST_VTX
:
368 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC
:
369 LIST_FOR_EACH_ENTRY(vtx
, &cf
->vtx
, list
) {
370 r
= r600_bc_vtx_build(bc
, vtx
, addr
);
376 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT
:
377 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE
:
380 R600_ERR("unsupported CF instruction (0x%X)\n", cf
->inst
);