r600g: drop compiler stuff and switch over dumb tgsi assembler
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "r600_asm.h"
24 #include "r600_context.h"
25 #include "util/u_memory.h"
26 #include "r600_sq.h"
27 #include <stdio.h>
28 #include <errno.h>
29
30 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
31
32 static struct r600_bc_cf *r600_bc_cf(void)
33 {
34 struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
35
36 if (cf == NULL)
37 return NULL;
38 LIST_INITHEAD(&cf->list);
39 LIST_INITHEAD(&cf->alu);
40 LIST_INITHEAD(&cf->vtx);
41 return cf;
42 }
43
44 static struct r600_bc_alu *r600_bc_alu(void)
45 {
46 struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
47
48 if (alu == NULL)
49 return NULL;
50 LIST_INITHEAD(&alu->list);
51 return alu;
52 }
53
54 static struct r600_bc_vtx *r600_bc_vtx(void)
55 {
56 struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
57
58 if (vtx == NULL)
59 return NULL;
60 LIST_INITHEAD(&vtx->list);
61 return vtx;
62 }
63
64 int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
65 {
66 LIST_INITHEAD(&bc->cf);
67 bc->family = family;
68 return 0;
69 }
70
71 static int r600_bc_add_cf(struct r600_bc *bc)
72 {
73 struct r600_bc_cf *cf = r600_bc_cf();
74
75 if (cf == NULL)
76 return -ENOMEM;
77 LIST_ADDTAIL(&cf->list, &bc->cf);
78 if (bc->cf_last)
79 cf->id = bc->cf_last->id + 2;
80 bc->cf_last = cf;
81 bc->ncf++;
82 bc->ndw += 2;
83 return 0;
84 }
85
86 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
87 {
88 int r;
89
90 r = r600_bc_add_cf(bc);
91 if (r)
92 return r;
93 bc->cf_last->inst = output->inst;
94 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
95 return 0;
96 }
97
98 int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
99 {
100 struct r600_bc_alu *nalu = r600_bc_alu();
101 struct r600_bc_alu *lalu;
102 int i, r;
103
104 if (nalu == NULL)
105 return -ENOMEM;
106 memcpy(nalu, alu, sizeof(struct r600_bc_alu));
107 nalu->nliteral = 0;
108
109 /* cf can contains only alu or only vtx or only tex */
110 if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) {
111 r = r600_bc_add_cf(bc);
112 if (r) {
113 free(nalu);
114 return r;
115 }
116 bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
117 }
118 /* number of gpr == the last gpr used in any alu */
119 for (i = 0; i < 3; i++) {
120 if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
121 bc->ngpr = alu->src[i].sel + 1;
122 }
123 /* compute how many literal are needed
124 * either 2 or 4 literals
125 */
126 if (alu->src[i].sel == 253) {
127 if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
128 nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
129 }
130 }
131 }
132 if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
133 lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
134 if (!lalu->last && lalu->nliteral > nalu->nliteral) {
135 nalu->nliteral = lalu->nliteral;
136 }
137 }
138 if (alu->dst.sel >= bc->ngpr) {
139 bc->ngpr = alu->dst.sel + 1;
140 }
141 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
142 /* each alu use 2 dwords */
143 bc->cf_last->ndw += 2;
144 bc->ndw += 2;
145 return 0;
146 }
147
148 int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
149 {
150 struct r600_bc_alu *alu;
151
152 if (bc->cf_last == NULL ||
153 bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
154 LIST_IS_EMPTY(&bc->cf_last->alu)) {
155 R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
156 return -EINVAL;
157 }
158 alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
159 if (!alu->last || !alu->nliteral) {
160 return 0;
161 }
162 memcpy(alu->value, value, 4 * 4);
163 bc->cf_last->ndw += alu->nliteral;
164 bc->ndw += alu->nliteral;
165 return 0;
166 }
167
168 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
169 {
170 struct r600_bc_vtx *nvtx = r600_bc_vtx();
171 int r;
172
173 if (nvtx == NULL)
174 return -ENOMEM;
175 memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
176
177 /* cf can contains only alu or only vtx or only tex */
178 if (bc->cf_last == NULL ||
179 (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
180 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)) {
181 r = r600_bc_add_cf(bc);
182 if (r) {
183 free(nvtx);
184 return r;
185 }
186 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
187 }
188 LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
189 /* each fetch use 6 dwords */
190 bc->cf_last->ndw += 4;
191 bc->ndw += 4;
192 return 0;
193 }
194
195 int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
196 {
197 bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
198 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
199 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
200 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
201 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
202 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
203 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
204 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
205 S_SQ_VTX_WORD1_USE_CONST_FIELDS(1) |
206 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
207 bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
208 bc->bytecode[id++] = 0;
209 return 0;
210 }
211
212 int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
213 {
214 unsigned i;
215
216 /* don't replace gpr by pv or ps for destination register */
217 if (alu->is_op3) {
218 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
219 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
220 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
221 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
222 S_SQ_ALU_WORD0_LAST(alu->last);
223 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
224 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
225 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
226 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
227 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
228 S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
229 S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
230 } else {
231 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
232 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
233 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
234 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
235 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
236 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
237 S_SQ_ALU_WORD0_LAST(alu->last);
238 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
239 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
240 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
241 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
242 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
243 S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
244 S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
245 }
246 if (alu->last) {
247 for (i = 0; i < alu->nliteral; i++) {
248 bc->bytecode[id++] = alu->value[i];
249 }
250 }
251 return 0;
252 }
253
254 int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
255 {
256 unsigned id = cf->id;
257
258 switch (cf->inst) {
259 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
260 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
261 bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
262 S_SQ_CF_ALU_WORD1_BARRIER(1) |
263 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
264 break;
265 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
266 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
267 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
268 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
269 S_SQ_CF_WORD1_BARRIER(1) |
270 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
271 break;
272 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
273 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
274 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
275 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
276 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
277 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
278 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
279 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
280 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
281 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
282 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
283 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
284 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
285 break;
286 default:
287 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
288 return -EINVAL;
289 }
290 return 0;
291 }
292
293 int r600_bc_build(struct r600_bc *bc)
294 {
295 struct r600_bc_cf *cf;
296 struct r600_bc_alu *alu;
297 struct r600_bc_vtx *vtx;
298 unsigned addr;
299 int r;
300
301
302 /* first path compute addr of each CF block */
303 /* addr start after all the CF instructions */
304 addr = bc->cf_last->id + 2;
305 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
306 switch (cf->inst) {
307 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
308 break;
309 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
310 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
311 /* fetch node need to be 16 bytes aligned*/
312 addr += 3;
313 addr &= 0xFFFFFFFCUL;
314 break;
315 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
316 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
317 break;
318 default:
319 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
320 return -EINVAL;
321 }
322 cf->addr = addr;
323 addr += cf->ndw;
324 bc->ndw = cf->addr + cf->ndw;
325 }
326 free(bc->bytecode);
327 bc->bytecode = calloc(1, bc->ndw * 4);
328 if (bc->bytecode == NULL)
329 return -ENOMEM;
330 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
331 addr = cf->addr;
332 r = r600_bc_cf_build(bc, cf);
333 if (r)
334 return r;
335 switch (cf->inst) {
336 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
337 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
338 switch (bc->family) {
339 case CHIP_R600:
340 case CHIP_RV610:
341 case CHIP_RV630:
342 case CHIP_RV670:
343 case CHIP_RV620:
344 case CHIP_RV635:
345 case CHIP_RS780:
346 case CHIP_RS880:
347 r = r600_bc_alu_build(bc, alu, addr);
348 break;
349 case CHIP_RV770:
350 case CHIP_RV730:
351 case CHIP_RV710:
352 case CHIP_RV740:
353 r = r700_bc_alu_build(bc, alu, addr);
354 break;
355 default:
356 R600_ERR("unknown family %d\n", bc->family);
357 return -EINVAL;
358 }
359 if (r)
360 return r;
361 addr += 2;
362 if (alu->last) {
363 addr += alu->nliteral;
364 }
365 }
366 break;
367 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
368 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
369 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
370 r = r600_bc_vtx_build(bc, vtx, addr);
371 if (r)
372 return r;
373 addr += 4;
374 }
375 break;
376 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
377 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
378 break;
379 default:
380 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
381 return -EINVAL;
382 }
383 }
384 return 0;
385 }