freedreno: prepare for a3xx
[mesa.git] / src / gallium / drivers / freedreno / a2xx / ir-a2xx.c
1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir-a2xx.h"
25
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include "freedreno_util.h"
32 #include "instr-a2xx.h"
33
34 #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35 #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__)
36 #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__)
37
38 #define REG_MASK 0x3f
39
40 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
41
42 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
43 uint32_t idx, struct ir2_shader_info *info);
44
45 static void reg_update_stats(struct ir2_register *reg,
46 struct ir2_shader_info *info, bool dest);
47 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
48 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
49 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
50 static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
51
52 /* simple allocator to carve allocations out of an up-front allocated heap,
53 * so that we can free everything easily in one shot.
54 */
55 static void * ir2_alloc(struct ir2_shader *shader, int sz)
56 {
57 void *ptr = &shader->heap[shader->heap_idx];
58 shader->heap_idx += align(sz, 4);
59 return ptr;
60 }
61
62 static char * ir2_strdup(struct ir2_shader *shader, const char *str)
63 {
64 char *ptr = NULL;
65 if (str) {
66 int len = strlen(str);
67 ptr = ir2_alloc(shader, len+1);
68 memcpy(ptr, str, len);
69 ptr[len] = '\0';
70 }
71 return ptr;
72 }
73
74 struct ir2_shader * ir2_shader_create(void)
75 {
76 DEBUG_MSG("");
77 return calloc(1, sizeof(struct ir2_shader));
78 }
79
80 void ir2_shader_destroy(struct ir2_shader *shader)
81 {
82 DEBUG_MSG("");
83 free(shader);
84 }
85
86 /* resolve addr/cnt/sequence fields in the individual CF's */
87 static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
88 {
89 uint32_t addr;
90 unsigned i;
91 int j;
92
93 addr = shader->cfs_count / 2;
94 for (i = 0; i < shader->cfs_count; i++) {
95 struct ir2_cf *cf = shader->cfs[i];
96 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
97 uint32_t sequence = 0;
98
99 if (cf->exec.addr && (cf->exec.addr != addr))
100 WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
101 if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
102 WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
103
104 for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
105 struct ir2_instruction *instr = cf->exec.instrs[j];
106 sequence <<= 2;
107 if (instr->instr_type == IR2_FETCH)
108 sequence |= 0x1;
109 if (instr->sync)
110 sequence |= 0x2;
111 }
112
113 cf->exec.addr = addr;
114 cf->exec.cnt = cf->exec.instrs_count;
115 cf->exec.sequence = sequence;
116
117 addr += cf->exec.instrs_count;
118 }
119 }
120
121 info->sizedwords = 3 * addr;
122
123 return 0;
124 }
125
126 void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
127 {
128 uint32_t i, j;
129 uint32_t *ptr, *dwords = NULL;
130 uint32_t idx = 0;
131 int ret;
132
133 info->sizedwords = 0;
134 info->max_reg = -1;
135 info->max_input_reg = 0;
136 info->regs_written = 0;
137
138 /* we need an even # of CF's.. insert a NOP if needed */
139 if (shader->cfs_count != align(shader->cfs_count, 2))
140 ir2_cf_create(shader, NOP);
141
142 /* first pass, resolve sizes and addresses: */
143 ret = shader_resolve(shader, info);
144 if (ret) {
145 ERROR_MSG("resolve failed: %d", ret);
146 goto fail;
147 }
148
149 ptr = dwords = calloc(1, 4 * info->sizedwords);
150
151 /* second pass, emit CF program in pairs: */
152 for (i = 0; i < shader->cfs_count; i += 2) {
153 instr_cf_t *cfs = (instr_cf_t *)ptr;
154 ret = cf_emit(shader->cfs[i], &cfs[0]);
155 if (ret) {
156 ERROR_MSG("CF emit failed: %d\n", ret);
157 goto fail;
158 }
159 ret = cf_emit(shader->cfs[i+1], &cfs[1]);
160 if (ret) {
161 ERROR_MSG("CF emit failed: %d\n", ret);
162 goto fail;
163 }
164 ptr += 3;
165 assert((ptr - dwords) <= info->sizedwords);
166 }
167
168 /* third pass, emit ALU/FETCH: */
169 for (i = 0; i < shader->cfs_count; i++) {
170 struct ir2_cf *cf = shader->cfs[i];
171 if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
172 for (j = 0; j < cf->exec.instrs_count; j++) {
173 ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
174 if (ret) {
175 ERROR_MSG("instruction emit failed: %d", ret);
176 goto fail;
177 }
178 ptr += 3;
179 assert((ptr - dwords) <= info->sizedwords);
180 }
181 }
182 }
183
184 return dwords;
185
186 fail:
187 free(dwords);
188 return NULL;
189 }
190
191
192 struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
193 {
194 struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
195 DEBUG_MSG("%d", cf_type);
196 cf->shader = shader;
197 cf->cf_type = cf_type;
198 assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
199 shader->cfs[shader->cfs_count++] = cf;
200 return cf;
201 }
202
203
204 /*
205 * CF instructions:
206 */
207
208 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
209 {
210 memset(instr, 0, sizeof(*instr));
211
212 instr->opc = cf->cf_type;
213
214 switch (cf->cf_type) {
215 case NOP:
216 break;
217 case EXEC:
218 case EXEC_END:
219 assert(cf->exec.addr <= 0x1ff);
220 assert(cf->exec.cnt <= 0x6);
221 assert(cf->exec.sequence <= 0xfff);
222 instr->exec.address = cf->exec.addr;
223 instr->exec.count = cf->exec.cnt;
224 instr->exec.serialize = cf->exec.sequence;
225 break;
226 case ALLOC:
227 assert(cf->alloc.size <= 0xf);
228 instr->alloc.size = cf->alloc.size;
229 switch (cf->alloc.type) {
230 case SQ_POSITION:
231 case SQ_PARAMETER_PIXEL:
232 instr->alloc.buffer_select = cf->alloc.type;
233 break;
234 default:
235 ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
236 return -1;
237 }
238 break;
239 case COND_EXEC:
240 case COND_EXEC_END:
241 case COND_PRED_EXEC:
242 case COND_PRED_EXEC_END:
243 case LOOP_START:
244 case LOOP_END:
245 case COND_CALL:
246 case RETURN:
247 case COND_JMP:
248 case COND_EXEC_PRED_CLEAN:
249 case COND_EXEC_PRED_CLEAN_END:
250 case MARK_VS_FETCH_DONE:
251 ERROR_MSG("TODO");
252 return -1;
253 }
254
255 return 0;
256 }
257
258
259 struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
260 {
261 struct ir2_instruction *instr =
262 ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
263 DEBUG_MSG("%d", instr_type);
264 instr->shader = cf->shader;
265 instr->pred = cf->shader->pred;
266 instr->instr_type = instr_type;
267 assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
268 cf->exec.instrs[cf->exec.instrs_count++] = instr;
269 return instr;
270 }
271
272
273 /*
274 * FETCH instructions:
275 */
276
277 static int instr_emit_fetch(struct ir2_instruction *instr,
278 uint32_t *dwords, uint32_t idx,
279 struct ir2_shader_info *info)
280 {
281 instr_fetch_t *fetch = (instr_fetch_t *)dwords;
282 int reg = 0;
283 struct ir2_register *dst_reg = instr->regs[reg++];
284 struct ir2_register *src_reg = instr->regs[reg++];
285
286 memset(fetch, 0, sizeof(*fetch));
287
288 reg_update_stats(dst_reg, info, true);
289 reg_update_stats(src_reg, info, false);
290
291 fetch->opc = instr->fetch.opc;
292
293 if (instr->fetch.opc == VTX_FETCH) {
294 instr_fetch_vtx_t *vtx = &fetch->vtx;
295
296 assert(instr->fetch.stride <= 0xff);
297 assert(instr->fetch.fmt <= 0x3f);
298 assert(instr->fetch.const_idx <= 0x1f);
299 assert(instr->fetch.const_idx_sel <= 0x3);
300
301 vtx->src_reg = src_reg->num;
302 vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
303 vtx->dst_reg = dst_reg->num;
304 vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
305 vtx->must_be_one = 1;
306 vtx->const_index = instr->fetch.const_idx;
307 vtx->const_index_sel = instr->fetch.const_idx_sel;
308 vtx->format_comp_all = !!instr->fetch.is_signed;
309 vtx->num_format_all = !instr->fetch.is_normalized;
310 vtx->format = instr->fetch.fmt;
311 vtx->stride = instr->fetch.stride;
312 vtx->offset = instr->fetch.offset;
313
314 if (instr->pred != IR2_PRED_NONE) {
315 vtx->pred_select = 1;
316 vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
317 }
318
319 /* XXX seems like every FETCH but the first has
320 * this bit set:
321 */
322 vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
323 vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
324 } else if (instr->fetch.opc == TEX_FETCH) {
325 instr_fetch_tex_t *tex = &fetch->tex;
326
327 assert(instr->fetch.const_idx <= 0x1f);
328
329 tex->src_reg = src_reg->num;
330 tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
331 tex->dst_reg = dst_reg->num;
332 tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
333 tex->const_idx = instr->fetch.const_idx;
334 tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
335 tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
336 tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
337 tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
338 tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
339 tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
340 tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
341 tex->use_comp_lod = 1;
342 tex->use_reg_lod = !instr->fetch.is_cube;
343 tex->sample_location = SAMPLE_CENTER;
344
345 if (instr->pred != IR2_PRED_NONE) {
346 tex->pred_select = 1;
347 tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
348 }
349
350 } else {
351 ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
352 return -1;
353 }
354
355 return 0;
356 }
357
358 /*
359 * ALU instructions:
360 */
361
362 static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
363 struct ir2_shader_info *info)
364 {
365 int reg = 0;
366 instr_alu_t *alu = (instr_alu_t *)dwords;
367 struct ir2_register *dst_reg = instr->regs[reg++];
368 struct ir2_register *src1_reg;
369 struct ir2_register *src2_reg;
370 struct ir2_register *src3_reg;
371
372 memset(alu, 0, sizeof(*alu));
373
374 /* handle instructions w/ 3 src operands: */
375 switch (instr->alu.vector_opc) {
376 case MULADDv:
377 case CNDEv:
378 case CNDGTEv:
379 case CNDGTv:
380 case DOT2ADDv:
381 /* note: disassembler lists 3rd src first, ie:
382 * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
383 * which is the reason for this strange ordering.
384 */
385 src3_reg = instr->regs[reg++];
386 break;
387 default:
388 src3_reg = NULL;
389 break;
390 }
391
392 src1_reg = instr->regs[reg++];
393 src2_reg = instr->regs[reg++];
394
395 reg_update_stats(dst_reg, info, true);
396 reg_update_stats(src1_reg, info, false);
397 reg_update_stats(src2_reg, info, false);
398
399 assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
400 assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
401 assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
402 assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
403 assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
404 assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
405
406 if (instr->alu.vector_opc == ~0) {
407 alu->vector_opc = MAXv;
408 alu->vector_write_mask = 0;
409 } else {
410 alu->vector_opc = instr->alu.vector_opc;
411 alu->vector_write_mask = reg_alu_dst_swiz(dst_reg);
412 }
413
414 alu->vector_dest = dst_reg->num;
415 alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT);
416
417 // TODO predicate case/condition.. need to add to parser
418
419 alu->src2_reg = src2_reg->num;
420 alu->src2_swiz = reg_alu_src_swiz(src2_reg);
421 alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE);
422 alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS);
423 alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST);
424
425 alu->src1_reg = src1_reg->num;
426 alu->src1_swiz = reg_alu_src_swiz(src1_reg);
427 alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE);
428 alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS);
429 alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST);
430
431 alu->vector_clamp = instr->alu.vector_clamp;
432 alu->scalar_clamp = instr->alu.scalar_clamp;
433
434 if (instr->alu.scalar_opc != ~0) {
435 struct ir2_register *sdst_reg = instr->regs[reg++];
436
437 reg_update_stats(sdst_reg, info, true);
438
439 assert(sdst_reg->flags == dst_reg->flags);
440
441 if (src3_reg) {
442 assert(src3_reg == instr->regs[reg++]);
443 } else {
444 src3_reg = instr->regs[reg++];
445 }
446
447 alu->scalar_dest = sdst_reg->num;
448 alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg);
449 alu->scalar_opc = instr->alu.scalar_opc;
450 } else {
451 /* not sure if this is required, but adreno compiler seems
452 * to always set scalar opc to MAXs if it is not used:
453 */
454 alu->scalar_opc = MAXs;
455 }
456
457 if (src3_reg) {
458 reg_update_stats(src3_reg, info, false);
459
460 alu->src3_reg = src3_reg->num;
461 alu->src3_swiz = reg_alu_src_swiz(src3_reg);
462 alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE);
463 alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS);
464 alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST);
465 } else {
466 /* not sure if this is required, but adreno compiler seems
467 * to always set register bank for 3rd src if unused:
468 */
469 alu->src3_sel = 1;
470 }
471
472 if (instr->pred != IR2_PRED_NONE) {
473 alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
474 }
475
476 return 0;
477 }
478
479 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
480 uint32_t idx, struct ir2_shader_info *info)
481 {
482 switch (instr->instr_type) {
483 case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
484 case IR2_ALU: return instr_emit_alu(instr, dwords, info);
485 }
486 return -1;
487 }
488
489
490 struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
491 int num, const char *swizzle, int flags)
492 {
493 struct ir2_register *reg =
494 ir2_alloc(instr->shader, sizeof(struct ir2_register));
495 DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
496 assert(num <= REG_MASK);
497 reg->flags = flags;
498 reg->num = num;
499 reg->swizzle = ir2_strdup(instr->shader, swizzle);
500 assert(instr->regs_count < ARRAY_SIZE(instr->regs));
501 instr->regs[instr->regs_count++] = reg;
502 return reg;
503 }
504
505 static void reg_update_stats(struct ir2_register *reg,
506 struct ir2_shader_info *info, bool dest)
507 {
508 if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
509 info->max_reg = MAX2(info->max_reg, reg->num);
510
511 if (dest) {
512 info->regs_written |= (1 << reg->num);
513 } else if (!(info->regs_written & (1 << reg->num))) {
514 /* for registers that haven't been written, they must be an
515 * input register that the thread scheduler (presumably?)
516 * needs to know about:
517 */
518 info->max_input_reg = MAX2(info->max_input_reg, reg->num);
519 }
520 }
521 }
522
523 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
524 {
525 uint32_t swiz = 0;
526 int i;
527
528 assert(reg->flags == 0);
529 assert(reg->swizzle);
530
531 DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
532
533 for (i = n-1; i >= 0; i--) {
534 swiz <<= 2;
535 switch (reg->swizzle[i]) {
536 default:
537 ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
538 case 'x': swiz |= 0x0; break;
539 case 'y': swiz |= 0x1; break;
540 case 'z': swiz |= 0x2; break;
541 case 'w': swiz |= 0x3; break;
542 }
543 }
544
545 return swiz;
546 }
547
548 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
549 {
550 uint32_t swiz = 0;
551 int i;
552
553 assert(reg->flags == 0);
554 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
555
556 DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
557
558 if (reg->swizzle) {
559 for (i = 3; i >= 0; i--) {
560 swiz <<= 3;
561 switch (reg->swizzle[i]) {
562 default:
563 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
564 case 'x': swiz |= 0x0; break;
565 case 'y': swiz |= 0x1; break;
566 case 'z': swiz |= 0x2; break;
567 case 'w': swiz |= 0x3; break;
568 case '0': swiz |= 0x4; break;
569 case '1': swiz |= 0x5; break;
570 case '_': swiz |= 0x7; break;
571 }
572 }
573 } else {
574 swiz = 0x688;
575 }
576
577 return swiz;
578 }
579
580 /* actually, a write-mask */
581 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
582 {
583 uint32_t swiz = 0;
584 int i;
585
586 assert((reg->flags & ~IR2_REG_EXPORT) == 0);
587 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
588
589 DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
590
591 if (reg->swizzle) {
592 for (i = 3; i >= 0; i--) {
593 swiz <<= 1;
594 if (reg->swizzle[i] == "xyzw"[i]) {
595 swiz |= 0x1;
596 } else if (reg->swizzle[i] != '_') {
597 ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
598 break;
599 }
600 }
601 } else {
602 swiz = 0xf;
603 }
604
605 return swiz;
606 }
607
608 static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
609 {
610 uint32_t swiz = 0;
611 int i;
612
613 assert((reg->flags & IR2_REG_EXPORT) == 0);
614 assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
615
616 DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
617
618 if (reg->swizzle) {
619 for (i = 3; i >= 0; i--) {
620 swiz <<= 2;
621 switch (reg->swizzle[i]) {
622 default:
623 ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
624 case 'x': swiz |= (0x0 - i) & 0x3; break;
625 case 'y': swiz |= (0x1 - i) & 0x3; break;
626 case 'z': swiz |= (0x2 - i) & 0x3; break;
627 case 'w': swiz |= (0x3 - i) & 0x3; break;
628 }
629 }
630 } else {
631 swiz = 0x0;
632 }
633
634 return swiz;
635 }