r600g: improve r600_bc_dump
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25 #include "util/u_format.h"
26 #include "util/u_memory.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "r600_pipe.h"
29 #include "r600_sq.h"
30 #include "r600_opcodes.h"
31 #include "r600_asm.h"
32 #include "r600_formats.h"
33 #include "r600d.h"
34
35 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
36 {
37 if(alu->is_op3)
38 return 3;
39
40 switch (alu->inst) {
41 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
42 return 0;
43 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
44 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
45 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
46 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
47 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
48 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
49 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
50 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
51 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
52 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
53 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
54 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
55 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
56 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
57 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
58 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
59 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
60 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
61 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
62 return 2;
63
64 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
65 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
66 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
67 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
68 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
69 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
70 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
71 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
72 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
73 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
74 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
75 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
76 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
77 return 1;
78 default: R600_ERR(
79 "Need instruction operand number for 0x%x.\n", alu->inst);
80 };
81
82 return 3;
83 }
84
85 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
86
87 static struct r600_bc_cf *r600_bc_cf(void)
88 {
89 struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
90
91 if (cf == NULL)
92 return NULL;
93 LIST_INITHEAD(&cf->list);
94 LIST_INITHEAD(&cf->alu);
95 LIST_INITHEAD(&cf->vtx);
96 LIST_INITHEAD(&cf->tex);
97 return cf;
98 }
99
100 static struct r600_bc_alu *r600_bc_alu(void)
101 {
102 struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
103
104 if (alu == NULL)
105 return NULL;
106 LIST_INITHEAD(&alu->list);
107 LIST_INITHEAD(&alu->bs_list);
108 return alu;
109 }
110
111 static struct r600_bc_vtx *r600_bc_vtx(void)
112 {
113 struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
114
115 if (vtx == NULL)
116 return NULL;
117 LIST_INITHEAD(&vtx->list);
118 return vtx;
119 }
120
121 static struct r600_bc_tex *r600_bc_tex(void)
122 {
123 struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex);
124
125 if (tex == NULL)
126 return NULL;
127 LIST_INITHEAD(&tex->list);
128 return tex;
129 }
130
131 int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
132 {
133 LIST_INITHEAD(&bc->cf);
134 bc->family = family;
135 switch (bc->family) {
136 case CHIP_R600:
137 case CHIP_RV610:
138 case CHIP_RV630:
139 case CHIP_RV670:
140 case CHIP_RV620:
141 case CHIP_RV635:
142 case CHIP_RS780:
143 case CHIP_RS880:
144 bc->chiprev = CHIPREV_R600;
145 break;
146 case CHIP_RV770:
147 case CHIP_RV730:
148 case CHIP_RV710:
149 case CHIP_RV740:
150 bc->chiprev = CHIPREV_R700;
151 break;
152 case CHIP_CEDAR:
153 case CHIP_REDWOOD:
154 case CHIP_JUNIPER:
155 case CHIP_CYPRESS:
156 case CHIP_HEMLOCK:
157 case CHIP_PALM:
158 bc->chiprev = CHIPREV_EVERGREEN;
159 break;
160 default:
161 R600_ERR("unknown family %d\n", bc->family);
162 return -EINVAL;
163 }
164 return 0;
165 }
166
167 static int r600_bc_add_cf(struct r600_bc *bc)
168 {
169 struct r600_bc_cf *cf = r600_bc_cf();
170
171 if (cf == NULL)
172 return -ENOMEM;
173 LIST_ADDTAIL(&cf->list, &bc->cf);
174 if (bc->cf_last)
175 cf->id = bc->cf_last->id + 2;
176 bc->cf_last = cf;
177 bc->ncf++;
178 bc->ndw += 2;
179 bc->force_add_cf = 0;
180 return 0;
181 }
182
183 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
184 {
185 int r;
186
187 r = r600_bc_add_cf(bc);
188 if (r)
189 return r;
190 bc->cf_last->inst = output->inst;
191 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
192 return 0;
193 }
194
195 const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000
196 SQ_ALU_VEC_120, //001
197 SQ_ALU_VEC_102, //010
198
199 SQ_ALU_VEC_201, //011
200 SQ_ALU_VEC_012, //100
201 SQ_ALU_VEC_021, //101
202
203 SQ_ALU_VEC_012, //110
204 SQ_ALU_VEC_012}; //111
205
206 const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000
207 SQ_ALU_SCL_122, //001
208 SQ_ALU_SCL_122, //010
209
210 SQ_ALU_SCL_221, //011
211 SQ_ALU_SCL_212, //100
212 SQ_ALU_SCL_122, //101
213
214 SQ_ALU_SCL_122, //110
215 SQ_ALU_SCL_122}; //111
216
217 static int init_gpr(struct r600_bc_alu *alu)
218 {
219 int cycle, component;
220 /* set up gpr use */
221 for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
222 for (component = 0; component < NUM_OF_COMPONENTS; component++)
223 alu->hw_gpr[cycle][component] = -1;
224 return 0;
225 }
226
227 #if 0
228 static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
229 {
230 if (alu->hw_gpr[cycle][chan] < 0)
231 alu->hw_gpr[cycle][chan] = sel;
232 else if (alu->hw_gpr[cycle][chan] != (int)sel) {
233 R600_ERR("Another scalar operation has already used GPR read port for channel\n");
234 return -1;
235 }
236 return 0;
237 }
238
239 static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
240 {
241 int table[3];
242 int ret = 0;
243 switch (swiz) {
244 case SQ_ALU_SCL_210:
245 table[0] = 2; table[1] = 1; table[2] = 0;
246 *p_cycle = table[sel];
247 break;
248 case SQ_ALU_SCL_122:
249 table[0] = 1; table[1] = 2; table[2] = 2;
250 *p_cycle = table[sel];
251 break;
252 case SQ_ALU_SCL_212:
253 table[0] = 2; table[1] = 1; table[2] = 2;
254 *p_cycle = table[sel];
255 break;
256 case SQ_ALU_SCL_221:
257 table[0] = 2; table[1] = 2; table[2] = 1;
258 *p_cycle = table[sel];
259 break;
260 break;
261 default:
262 R600_ERR("bad scalar bank swizzle value\n");
263 ret = -1;
264 break;
265 }
266 return ret;
267 }
268
269 static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
270 {
271 int table[3];
272 int ret;
273
274 switch (swiz) {
275 case SQ_ALU_VEC_012:
276 table[0] = 0; table[1] = 1; table[2] = 2;
277 *p_cycle = table[sel];
278 break;
279 case SQ_ALU_VEC_021:
280 table[0] = 0; table[1] = 2; table[2] = 1;
281 *p_cycle = table[sel];
282 break;
283 case SQ_ALU_VEC_120:
284 table[0] = 1; table[1] = 2; table[2] = 0;
285 *p_cycle = table[sel];
286 break;
287 case SQ_ALU_VEC_102:
288 table[0] = 1; table[1] = 0; table[2] = 2;
289 *p_cycle = table[sel];
290 break;
291 case SQ_ALU_VEC_201:
292 table[0] = 2; table[1] = 0; table[2] = 1;
293 *p_cycle = table[sel];
294 break;
295 case SQ_ALU_VEC_210:
296 table[0] = 2; table[1] = 1; table[2] = 0;
297 *p_cycle = table[sel];
298 break;
299 default:
300 R600_ERR("bad vector bank swizzle value\n");
301 ret = -1;
302 break;
303 }
304 return ret;
305 }
306
307
308
309 static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
310 {
311 int num_src;
312 int i;
313 int channel_swizzle;
314
315 num_src = r600_bc_get_num_operands(alu);
316
317 for (i = 0; i < num_src; i++) {
318 channel_swizzle = alu->src[i].chan;
319 if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
320 chan_counter[channel_swizzle]++;
321 }
322 }
323
324 /* we need something like this I think - but this is bogus */
325 int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
326 {
327 struct r600_bc_alu *alu;
328 int chan_counter[4] = { 0 };
329
330 update_chan_counter(alu_first, chan_counter);
331
332 LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
333 update_chan_counter(alu, chan_counter);
334 }
335
336 if (chan_counter[0] > 3 ||
337 chan_counter[1] > 3 ||
338 chan_counter[2] > 3 ||
339 chan_counter[3] > 3) {
340 R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
341 alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
342 return -1;
343 }
344 return 0;
345 }
346 #endif
347
348 static int is_const(int sel)
349 {
350 if (sel > 255 && sel < 512)
351 return 1;
352 if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
353 return 1;
354 return 0;
355 }
356
357 static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
358 {
359 unsigned swizzle_key;
360
361 if (alu->bank_swizzle_force) {
362 alu->bank_swizzle = alu->bank_swizzle_force;
363 return 0;
364 }
365 swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
366 (is_const(alu->src[1].sel) ? 2 : 0 ) +
367 (is_const(alu->src[2].sel) ? 1 : 0 );
368
369 alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
370 return 0;
371 }
372
373 static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
374 {
375 unsigned swizzle_key;
376
377 if (alu->bank_swizzle_force) {
378 alu->bank_swizzle = alu->bank_swizzle_force;
379 return 0;
380 }
381 swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) +
382 (is_const(alu->src[1].sel) ? 2 : 0 ) +
383 (is_const(alu->src[2].sel) ? 1 : 0 );
384
385 alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
386 return 0;
387 }
388
389 static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
390 {
391 struct r600_bc_alu *alu = NULL;
392 int num_instr = 1;
393
394 init_gpr(alu_first);
395
396 LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
397 num_instr++;
398 }
399
400 if (num_instr == 1) {
401 check_scalar(bc, alu_first);
402
403 } else {
404 /* check_read_slots(bc, bc->cf_last->curr_bs_head);*/
405 check_vector(bc, alu_first);
406 LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
407 check_vector(bc, alu);
408 }
409 }
410 return 0;
411 }
412
413 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
414 {
415 struct r600_bc_alu *nalu = r600_bc_alu();
416 struct r600_bc_alu *lalu;
417 int i, r;
418
419 if (nalu == NULL)
420 return -ENOMEM;
421 memcpy(nalu, alu, sizeof(struct r600_bc_alu));
422 nalu->nliteral = 0;
423
424 /* cf can contains only alu or only vtx or only tex */
425 if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
426 bc->force_add_cf) {
427 r = r600_bc_add_cf(bc);
428 if (r) {
429 free(nalu);
430 return r;
431 }
432 bc->cf_last->inst = (type << 3);
433 }
434 if (!bc->cf_last->curr_bs_head) {
435 bc->cf_last->curr_bs_head = nalu;
436 LIST_INITHEAD(&nalu->bs_list);
437 } else {
438 LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
439 }
440 /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
441 * worst case */
442 if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
443 bc->force_add_cf = 1;
444 }
445 /* number of gpr == the last gpr used in any alu */
446 for (i = 0; i < 3; i++) {
447 if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
448 bc->ngpr = alu->src[i].sel + 1;
449 }
450 /* compute how many literal are needed
451 * either 2 or 4 literals
452 */
453 if (alu->src[i].sel == 253) {
454 if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
455 nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
456 }
457 }
458 }
459 if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
460 lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
461 if (!lalu->last && lalu->nliteral > nalu->nliteral) {
462 nalu->nliteral = lalu->nliteral;
463 }
464 }
465 if (alu->dst.sel >= bc->ngpr) {
466 bc->ngpr = alu->dst.sel + 1;
467 }
468 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
469 /* each alu use 2 dwords */
470 bc->cf_last->ndw += 2;
471 bc->ndw += 2;
472
473 bc->cf_last->kcache0_mode = 2;
474
475 /* process cur ALU instructions for bank swizzle */
476 if (alu->last) {
477 check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
478 bc->cf_last->curr_bs_head = NULL;
479 }
480 return 0;
481 }
482
483 int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
484 {
485 return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
486 }
487
488 int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
489 {
490 struct r600_bc_alu *alu;
491
492 if (bc->cf_last == NULL) {
493 return 0;
494 }
495 if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
496 return 0;
497 }
498 /* all same on EG */
499 if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
500 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
501 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
502 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
503 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
504 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
505 bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
506 return 0;
507 }
508 /* same on EG */
509 if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
510 (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
511 (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
512 (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
513 LIST_IS_EMPTY(&bc->cf_last->alu)) {
514 R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
515 return -EINVAL;
516 }
517 alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
518 if (!alu->last || !alu->nliteral || alu->literal_added) {
519 return 0;
520 }
521 memcpy(alu->value, value, 4 * 4);
522 bc->cf_last->ndw += alu->nliteral;
523 bc->ndw += alu->nliteral;
524 alu->literal_added = 1;
525 return 0;
526 }
527
528 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
529 {
530 struct r600_bc_vtx *nvtx = r600_bc_vtx();
531 int r;
532
533 if (nvtx == NULL)
534 return -ENOMEM;
535 memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
536
537 /* cf can contains only alu or only vtx or only tex */
538 if (bc->cf_last == NULL ||
539 (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
540 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) ||
541 bc->force_add_cf) {
542 r = r600_bc_add_cf(bc);
543 if (r) {
544 free(nvtx);
545 return r;
546 }
547 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
548 }
549 LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
550 /* each fetch use 4 dwords */
551 bc->cf_last->ndw += 4;
552 bc->ndw += 4;
553 if ((bc->ndw / 4) > 7)
554 bc->force_add_cf = 1;
555 return 0;
556 }
557
558 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
559 {
560 struct r600_bc_tex *ntex = r600_bc_tex();
561 int r;
562
563 if (ntex == NULL)
564 return -ENOMEM;
565 memcpy(ntex, tex, sizeof(struct r600_bc_tex));
566
567 /* cf can contains only alu or only vtx or only tex */
568 if (bc->cf_last == NULL ||
569 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
570 bc->force_add_cf) {
571 r = r600_bc_add_cf(bc);
572 if (r) {
573 free(ntex);
574 return r;
575 }
576 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
577 }
578 LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
579 /* each texture fetch use 4 dwords */
580 bc->cf_last->ndw += 4;
581 bc->ndw += 4;
582 if ((bc->ndw / 4) > 7)
583 bc->force_add_cf = 1;
584 return 0;
585 }
586
587 int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
588 {
589 int r;
590 r = r600_bc_add_cf(bc);
591 if (r)
592 return r;
593
594 bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
595 bc->cf_last->inst = inst;
596 return 0;
597 }
598
599 /* common to all 3 families */
600 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
601 {
602 unsigned fetch_resource_start = 0;
603
604 /* check if we are fetch shader */
605 /* fetch shader can also access vertex resource,
606 * first fetch shader resource is at 160
607 */
608 if (bc->type == -1) {
609 switch (bc->chiprev) {
610 /* r600 */
611 case CHIPREV_R600:
612 /* r700 */
613 case CHIPREV_R700:
614 fetch_resource_start = 160;
615 break;
616 /* evergreen */
617 case CHIPREV_EVERGREEN:
618 fetch_resource_start = 0;
619 break;
620 default:
621 fprintf(stderr, "%s:%s:%d unknown chiprev %d\n",
622 __FILE__, __func__, __LINE__, bc->chiprev);
623 break;
624 }
625 }
626 bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id + fetch_resource_start) |
627 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
628 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) |
629 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
630 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
631 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
632 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
633 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
634 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
635 S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
636 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
637 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
638 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
639 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
640 bc->bytecode[id++] = S_SQ_VTX_WORD2_MEGA_FETCH(1);
641 bc->bytecode[id++] = 0;
642 return 0;
643 }
644
645 /* common to all 3 families */
646 static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id)
647 {
648 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
649 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
650 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
651 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
652 bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
653 S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
654 S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
655 S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
656 S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
657 S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
658 S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
659 S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
660 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
661 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
662 S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
663 bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
664 S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
665 S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
666 S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
667 S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
668 S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
669 S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
670 S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
671 bc->bytecode[id++] = 0;
672 return 0;
673 }
674
675 /* r600 only, r700/eg bits in r700_asm.c */
676 static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
677 {
678 unsigned i;
679
680 /* don't replace gpr by pv or ps for destination register */
681 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
682 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
683 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
684 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
685 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
686 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
687 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
688 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
689 S_SQ_ALU_WORD0_LAST(alu->last);
690
691 if (alu->is_op3) {
692 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
693 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
694 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
695 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
696 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
697 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
698 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
699 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
700 S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
701 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
702 } else {
703 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
704 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
705 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
706 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
707 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
708 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
709 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
710 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
711 S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
712 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
713 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
714 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
715 }
716 if (alu->last) {
717 if (alu->nliteral && !alu->literal_added) {
718 R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
719 }
720 for (i = 0; i < alu->nliteral; i++) {
721 bc->bytecode[id++] = alu->value[i];
722 }
723 }
724 return 0;
725 }
726
727 /* common for r600/r700 - eg in eg_asm.c */
728 static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
729 {
730 unsigned id = cf->id;
731
732 switch (cf->inst) {
733 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
734 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
735 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
736 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
737 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
738 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
739 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
740 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank);
741
742 bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
743 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
744 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
745 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
746 S_SQ_CF_ALU_WORD1_BARRIER(1) |
747 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
748 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
749 break;
750 case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
751 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
752 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
753 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
754 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
755 S_SQ_CF_WORD1_BARRIER(1) |
756 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
757 break;
758 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
759 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
760 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
761 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
762 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
763 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
764 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
765 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
766 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
767 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
768 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
769 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
770 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
771 break;
772 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
773 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
774 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
775 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
776 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
777 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
778 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
779 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
780 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
781 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
782 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
783 S_SQ_CF_WORD1_BARRIER(1) |
784 S_SQ_CF_WORD1_COND(cf->cond) |
785 S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
786
787 break;
788 default:
789 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
790 return -EINVAL;
791 }
792 return 0;
793 }
794
795 int r600_bc_build(struct r600_bc *bc)
796 {
797 struct r600_bc_cf *cf;
798 struct r600_bc_alu *alu;
799 struct r600_bc_vtx *vtx;
800 struct r600_bc_tex *tex;
801 unsigned addr;
802 int r;
803
804 if (bc->callstack[0].max > 0)
805 bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
806 if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
807 bc->nstack = 1;
808 }
809
810 /* first path compute addr of each CF block */
811 /* addr start after all the CF instructions */
812 addr = bc->cf_last->id + 2;
813 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
814 switch (cf->inst) {
815 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
816 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
817 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
818 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
819 break;
820 case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
821 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
822 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
823 /* fetch node need to be 16 bytes aligned*/
824 addr += 3;
825 addr &= 0xFFFFFFFCUL;
826 break;
827 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
828 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
829 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
830 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
831 break;
832 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
833 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
834 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
835 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
836 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
837 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
838 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
839 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
840 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
841 break;
842 default:
843 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
844 return -EINVAL;
845 }
846 cf->addr = addr;
847 addr += cf->ndw;
848 bc->ndw = cf->addr + cf->ndw;
849 }
850 free(bc->bytecode);
851 bc->bytecode = calloc(1, bc->ndw * 4);
852 if (bc->bytecode == NULL)
853 return -ENOMEM;
854 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
855 addr = cf->addr;
856 if (bc->chiprev == CHIPREV_EVERGREEN)
857 r = eg_bc_cf_build(bc, cf);
858 else
859 r = r600_bc_cf_build(bc, cf);
860 if (r)
861 return r;
862 switch (cf->inst) {
863 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
864 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
865 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
866 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
867 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
868 switch(bc->chiprev) {
869 case CHIPREV_R600:
870 r = r600_bc_alu_build(bc, alu, addr);
871 break;
872 case CHIPREV_R700:
873 case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */
874 r = r700_bc_alu_build(bc, alu, addr);
875 break;
876 default:
877 R600_ERR("unknown family %d\n", bc->family);
878 return -EINVAL;
879 }
880 if (r)
881 return r;
882 addr += 2;
883 if (alu->last) {
884 addr += alu->nliteral;
885 }
886 }
887 break;
888 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
889 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
890 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
891 r = r600_bc_vtx_build(bc, vtx, addr);
892 if (r)
893 return r;
894 addr += 4;
895 }
896 break;
897 case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
898 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
899 r = r600_bc_tex_build(bc, tex, addr);
900 if (r)
901 return r;
902 addr += 4;
903 }
904 break;
905 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
906 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
907 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
908 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
909 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
910 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
911 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
912 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
913 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
914 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
915 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
916 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
917 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
918 break;
919 default:
920 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
921 return -EINVAL;
922 }
923 }
924 return 0;
925 }
926
927 void r600_bc_clear(struct r600_bc *bc)
928 {
929 struct r600_bc_cf *cf = NULL, *next_cf;
930
931 free(bc->bytecode);
932 bc->bytecode = NULL;
933
934 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
935 struct r600_bc_alu *alu = NULL, *next_alu;
936 struct r600_bc_tex *tex = NULL, *next_tex;
937 struct r600_bc_tex *vtx = NULL, *next_vtx;
938
939 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
940 free(alu);
941 }
942
943 LIST_INITHEAD(&cf->alu);
944
945 LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
946 free(tex);
947 }
948
949 LIST_INITHEAD(&cf->tex);
950
951 LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
952 free(vtx);
953 }
954
955 LIST_INITHEAD(&cf->vtx);
956
957 free(cf);
958 }
959
960 LIST_INITHEAD(&cf->list);
961 }
962
963 void r600_bc_dump(struct r600_bc *bc)
964 {
965 struct r600_bc_cf *cf;
966 struct r600_bc_alu *alu;
967 struct r600_bc_vtx *vtx;
968 struct r600_bc_tex *tex;
969
970 unsigned i, id;
971 char chip = '6';
972
973 switch (bc->chiprev) {
974 case 1:
975 chip = '7';
976 break;
977 case 2:
978 chip = 'E';
979 break;
980 case 0:
981 default:
982 chip = '6';
983 break;
984 }
985 fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw);
986 fprintf(stderr, " %c\n", chip);
987
988 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
989 id = cf->id;
990
991 switch (cf->inst) {
992 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
993 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
994 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
995 case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
996 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
997 fprintf(stderr, "ADDR:%d ", cf->addr);
998 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache0_mode);
999 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache0_bank);
1000 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache1_bank);
1001 id++;
1002 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
1003 fprintf(stderr, "INST:%d ", cf->inst);
1004 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache1_mode);
1005 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache0_addr);
1006 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache1_addr);
1007 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
1008 break;
1009 case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
1010 case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
1011 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
1012 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
1013 fprintf(stderr, "ADDR:%d\n", cf->addr);
1014 id++;
1015 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
1016 fprintf(stderr, "INST:%d ", cf->inst);
1017 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
1018 break;
1019 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
1020 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
1021 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
1022 fprintf(stderr, "GPR:%X ", cf->output.gpr);
1023 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
1024 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
1025 fprintf(stderr, "TYPE:%X\n", cf->output.type);
1026 id++;
1027 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
1028 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
1029 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
1030 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
1031 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
1032 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
1033 fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
1034 fprintf(stderr, "INST:%d ", cf->output.inst);
1035 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
1036 break;
1037 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
1038 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
1039 case V_SQ_CF_WORD1_SQ_CF_INST_POP:
1040 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
1041 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
1042 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
1043 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
1044 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
1045 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
1046 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
1047 fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
1048 id++;
1049 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
1050 fprintf(stderr, "INST:%d ", cf->inst);
1051 fprintf(stderr, "COND:%X ", cf->cond);
1052 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
1053 break;
1054 }
1055
1056 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1057 id = cf->addr;
1058 fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
1059 fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
1060 fprintf(stderr, "REL:%d ", alu->src[0].rel);
1061 fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
1062 fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
1063 fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
1064 fprintf(stderr, "REL:%d ", alu->src[1].rel);
1065 fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
1066 fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
1067 fprintf(stderr, "LAST:%d)\n", alu->last);
1068 id++;
1069 if (alu->is_op3) {
1070 fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
1071 fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
1072 fprintf(stderr, "CHAN:%d ", alu->dst.chan);
1073 fprintf(stderr, "REL:%d ", alu->dst.rel);
1074 fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
1075 fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
1076 fprintf(stderr, "REL:%d ", alu->src[2].rel);
1077 fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
1078 fprintf(stderr, "NEG:%d) ", alu->src[2].neg);
1079 fprintf(stderr, "INST:%d ", alu->inst);
1080 fprintf(stderr, "BANK_SWIZZLE:%d\n", alu->bank_swizzle);
1081 } else {
1082 fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
1083 fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
1084 fprintf(stderr, "CHAN:%d ", alu->dst.chan);
1085 fprintf(stderr, "REL:%d ", alu->dst.rel);
1086 fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
1087 fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
1088 fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
1089 fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
1090 fprintf(stderr, "OMOD:%d ", alu->omod);
1091 fprintf(stderr, "INST:%d ", alu->inst);
1092 fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
1093 fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
1094 fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
1095 }
1096
1097 if (alu->last) {
1098 for (i = 0; i < alu->nliteral; i++) {
1099 float *f = (float*)(bc->bytecode + id);
1100 fprintf(stderr, "%04d %08X %f\n", id, bc->bytecode[id], *f);
1101 }
1102 }
1103 }
1104
1105 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1106 //TODO
1107 }
1108
1109 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1110 //TODO
1111 }
1112 }
1113
1114 fprintf(stderr, "--------------------------------------\n");
1115 }
1116
1117 void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
1118 {
1119 struct r600_pipe_state *rstate;
1120 unsigned i = 0;
1121
1122 if (count > 8) {
1123 bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1124 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
1125 S_SQ_CF_WORD1_BARRIER(1) |
1126 S_SQ_CF_WORD1_COUNT(8 - 1);
1127 bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
1128 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
1129 S_SQ_CF_WORD1_BARRIER(1) |
1130 S_SQ_CF_WORD1_COUNT(count - 8 - 1);
1131 } else {
1132 bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1133 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
1134 S_SQ_CF_WORD1_BARRIER(1) |
1135 S_SQ_CF_WORD1_COUNT(count - 1);
1136 }
1137 bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
1138 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
1139 S_SQ_CF_WORD1_BARRIER(1);
1140
1141 rstate = &ve->rstate;
1142 rstate->id = R600_PIPE_STATE_FETCH_SHADER;
1143 rstate->nregs = 0;
1144 r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
1145 0x00000000, 0xFFFFFFFF, NULL);
1146 r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
1147 0x00000000, 0xFFFFFFFF, NULL);
1148 r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
1149 r600_bo_offset(ve->fetch_shader) >> 8,
1150 0xFFFFFFFF, ve->fetch_shader);
1151 }
1152
1153 void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
1154 {
1155 struct r600_pipe_state *rstate;
1156 unsigned i = 0;
1157
1158 if (count > 8) {
1159 bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1160 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
1161 S_SQ_CF_WORD1_BARRIER(1) |
1162 S_SQ_CF_WORD1_COUNT(8 - 1);
1163 bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
1164 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
1165 S_SQ_CF_WORD1_BARRIER(1) |
1166 S_SQ_CF_WORD1_COUNT((count - 8) - 1);
1167 } else {
1168 bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
1169 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
1170 S_SQ_CF_WORD1_BARRIER(1) |
1171 S_SQ_CF_WORD1_COUNT(count - 1);
1172 }
1173 bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
1174 bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
1175 S_SQ_CF_WORD1_BARRIER(1);
1176
1177 rstate = &ve->rstate;
1178 rstate->id = R600_PIPE_STATE_FETCH_SHADER;
1179 rstate->nregs = 0;
1180 r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
1181 0x00000000, 0xFFFFFFFF, NULL);
1182 r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
1183 0x00000000, 0xFFFFFFFF, NULL);
1184 r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
1185 r600_bo_offset(ve->fetch_shader) >> 8,
1186 0xFFFFFFFF, ve->fetch_shader);
1187 }
1188
1189 static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
1190 unsigned *num_format, unsigned *format_comp)
1191 {
1192 const struct util_format_description *desc;
1193 unsigned i;
1194
1195 *format = 0;
1196 *num_format = 0;
1197 *format_comp = 0;
1198
1199 desc = util_format_description(pformat);
1200 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
1201 goto out_unknown;
1202 }
1203
1204 /* Find the first non-VOID channel. */
1205 for (i = 0; i < 4; i++) {
1206 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
1207 break;
1208 }
1209 }
1210
1211 switch (desc->channel[i].type) {
1212 /* Half-floats, floats, doubles */
1213 case UTIL_FORMAT_TYPE_FLOAT:
1214 switch (desc->channel[i].size) {
1215 case 16:
1216 switch (desc->nr_channels) {
1217 case 1:
1218 *format = FMT_16_FLOAT;
1219 break;
1220 case 2:
1221 *format = FMT_16_16_FLOAT;
1222 break;
1223 case 3:
1224 *format = FMT_16_16_16_FLOAT;
1225 break;
1226 case 4:
1227 *format = FMT_16_16_16_16_FLOAT;
1228 break;
1229 }
1230 break;
1231 case 32:
1232 switch (desc->nr_channels) {
1233 case 1:
1234 *format = FMT_32_FLOAT;
1235 break;
1236 case 2:
1237 *format = FMT_32_32_FLOAT;
1238 break;
1239 case 3:
1240 *format = FMT_32_32_32_FLOAT;
1241 break;
1242 case 4:
1243 *format = FMT_32_32_32_32_FLOAT;
1244 break;
1245 }
1246 break;
1247 default:
1248 goto out_unknown;
1249 }
1250 break;
1251 /* Unsigned ints */
1252 case UTIL_FORMAT_TYPE_UNSIGNED:
1253 /* Signed ints */
1254 case UTIL_FORMAT_TYPE_SIGNED:
1255 switch (desc->channel[i].size) {
1256 case 8:
1257 switch (desc->nr_channels) {
1258 case 1:
1259 *format = FMT_8;
1260 break;
1261 case 2:
1262 *format = FMT_8_8;
1263 break;
1264 case 3:
1265 // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
1266 // break;
1267 case 4:
1268 *format = FMT_8_8_8_8;
1269 break;
1270 }
1271 break;
1272 case 16:
1273 switch (desc->nr_channels) {
1274 case 1:
1275 *format = FMT_16;
1276 break;
1277 case 2:
1278 *format = FMT_16_16;
1279 break;
1280 case 3:
1281 // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
1282 // break;
1283 case 4:
1284 *format = FMT_16_16_16_16;
1285 break;
1286 }
1287 break;
1288 case 32:
1289 switch (desc->nr_channels) {
1290 case 1:
1291 *format = FMT_32;
1292 break;
1293 case 2:
1294 *format = FMT_32_32;
1295 break;
1296 case 3:
1297 *format = FMT_32_32_32;
1298 break;
1299 case 4:
1300 *format = FMT_32_32_32_32;
1301 break;
1302 }
1303 break;
1304 default:
1305 goto out_unknown;
1306 }
1307 break;
1308 default:
1309 goto out_unknown;
1310 }
1311
1312 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1313 *format_comp = 1;
1314 }
1315 if (desc->channel[i].normalized) {
1316 *num_format = 0;
1317 } else {
1318 *num_format = 2;
1319 }
1320 return;
1321 out_unknown:
1322 R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
1323 }
1324
1325 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
1326 {
1327 unsigned ndw, i;
1328 u32 *bytecode;
1329 unsigned fetch_resource_start = 0, format, num_format, format_comp;
1330 struct pipe_vertex_element *elements = ve->elements;
1331 const struct util_format_description *desc;
1332
1333 /* 2 dwords for cf aligned to 4 + 4 dwords per input */
1334 ndw = 8 + ve->count * 4;
1335 ve->fs_size = ndw * 4;
1336
1337 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
1338 ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
1339 if (ve->fetch_shader == NULL) {
1340 return -ENOMEM;
1341 }
1342
1343 bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
1344 if (bytecode == NULL) {
1345 r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
1346 return -ENOMEM;
1347 }
1348
1349 if (rctx->family >= CHIP_CEDAR) {
1350 eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
1351 } else {
1352 r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
1353 fetch_resource_start = 160;
1354 }
1355
1356 /* vertex elements offset need special handling, if offset is bigger
1357 * than what we can put in fetch instruction then we need to alterate
1358 * the vertex resource offset. In such case in order to simplify code
1359 * we will bound one resource per elements. It's a worst case scenario.
1360 */
1361 for (i = 0; i < ve->count; i++) {
1362 ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
1363 if (ve->vbuffer_offset[i]) {
1364 ve->vbuffer_need_offset = 1;
1365 }
1366 }
1367
1368 for (i = 0; i < ve->count; i++) {
1369 unsigned vbuffer_index;
1370 r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
1371 desc = util_format_description(ve->hw_format[i]);
1372 if (desc == NULL) {
1373 R600_ERR("unknown format %d\n", ve->hw_format[i]);
1374 r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
1375 return -EINVAL;
1376 }
1377
1378 /* see above for vbuffer_need_offset explanation */
1379 vbuffer_index = elements[i].vertex_buffer_index;
1380 if (ve->vbuffer_need_offset) {
1381 bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
1382 } else {
1383 bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
1384 }
1385 bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
1386 S_SQ_VTX_WORD0_SRC_SEL_X(0) |
1387 S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
1388 bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
1389 S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
1390 S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
1391 S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
1392 S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
1393 S_SQ_VTX_WORD1_DATA_FORMAT(format) |
1394 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
1395 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
1396 S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
1397 S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
1398 bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
1399 S_SQ_VTX_WORD2_MEGA_FETCH(1);
1400 bytecode[8 + i * 4 + 3] = 0;
1401 }
1402 r600_bo_unmap(rctx->radeon, ve->fetch_shader);
1403 return 0;
1404 }