2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 #include "freedreno_util.h"
32 #include "instr-a2xx.h"
34 #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35 #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__)
36 #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__)
40 static int cf_emit(struct ir2_cf
*cf
, instr_cf_t
*instr
);
42 static int instr_emit(struct ir2_instruction
*instr
, uint32_t *dwords
,
43 uint32_t idx
, struct ir2_shader_info
*info
);
45 static void reg_update_stats(struct ir2_register
*reg
,
46 struct ir2_shader_info
*info
, bool dest
);
47 static uint32_t reg_fetch_src_swiz(struct ir2_register
*reg
, uint32_t n
);
48 static uint32_t reg_fetch_dst_swiz(struct ir2_register
*reg
);
49 static uint32_t reg_alu_dst_swiz(struct ir2_register
*reg
);
50 static uint32_t reg_alu_src_swiz(struct ir2_register
*reg
);
52 /* simple allocator to carve allocations out of an up-front allocated heap,
53 * so that we can free everything easily in one shot.
55 static void * ir2_alloc(struct ir2_shader
*shader
, int sz
)
57 void *ptr
= &shader
->heap
[shader
->heap_idx
];
58 shader
->heap_idx
+= align(sz
, 4);
62 static char * ir2_strdup(struct ir2_shader
*shader
, const char *str
)
66 int len
= strlen(str
);
67 ptr
= ir2_alloc(shader
, len
+1);
68 memcpy(ptr
, str
, len
);
74 struct ir2_shader
* ir2_shader_create(void)
77 return calloc(1, sizeof(struct ir2_shader
));
80 void ir2_shader_destroy(struct ir2_shader
*shader
)
86 /* resolve addr/cnt/sequence fields in the individual CF's */
87 static int shader_resolve(struct ir2_shader
*shader
, struct ir2_shader_info
*info
)
93 addr
= shader
->cfs_count
/ 2;
94 for (i
= 0; i
< shader
->cfs_count
; i
++) {
95 struct ir2_cf
*cf
= shader
->cfs
[i
];
96 if ((cf
->cf_type
== EXEC
) || (cf
->cf_type
== EXEC_END
)) {
97 uint32_t sequence
= 0;
99 if (cf
->exec
.addr
&& (cf
->exec
.addr
!= addr
))
100 WARN_MSG("invalid addr '%d' at CF %d", cf
->exec
.addr
, i
);
101 if (cf
->exec
.cnt
&& (cf
->exec
.cnt
!= cf
->exec
.instrs_count
))
102 WARN_MSG("invalid cnt '%d' at CF %d", cf
->exec
.cnt
, i
);
104 for (j
= cf
->exec
.instrs_count
- 1; j
>= 0; j
--) {
105 struct ir2_instruction
*instr
= cf
->exec
.instrs
[j
];
107 if (instr
->instr_type
== IR2_FETCH
)
113 cf
->exec
.addr
= addr
;
114 cf
->exec
.cnt
= cf
->exec
.instrs_count
;
115 cf
->exec
.sequence
= sequence
;
117 addr
+= cf
->exec
.instrs_count
;
121 info
->sizedwords
= 3 * addr
;
126 void * ir2_shader_assemble(struct ir2_shader
*shader
, struct ir2_shader_info
*info
)
129 uint32_t *ptr
, *dwords
= NULL
;
133 info
->sizedwords
= 0;
135 info
->max_input_reg
= 0;
136 info
->regs_written
= 0;
138 /* we need an even # of CF's.. insert a NOP if needed */
139 if (shader
->cfs_count
!= align(shader
->cfs_count
, 2))
140 ir2_cf_create(shader
, NOP
);
142 /* first pass, resolve sizes and addresses: */
143 ret
= shader_resolve(shader
, info
);
145 ERROR_MSG("resolve failed: %d", ret
);
149 ptr
= dwords
= calloc(1, 4 * info
->sizedwords
);
151 /* second pass, emit CF program in pairs: */
152 for (i
= 0; i
< shader
->cfs_count
; i
+= 2) {
153 instr_cf_t
*cfs
= (instr_cf_t
*)ptr
;
154 ret
= cf_emit(shader
->cfs
[i
], &cfs
[0]);
156 ERROR_MSG("CF emit failed: %d\n", ret
);
159 ret
= cf_emit(shader
->cfs
[i
+1], &cfs
[1]);
161 ERROR_MSG("CF emit failed: %d\n", ret
);
165 assert((ptr
- dwords
) <= info
->sizedwords
);
168 /* third pass, emit ALU/FETCH: */
169 for (i
= 0; i
< shader
->cfs_count
; i
++) {
170 struct ir2_cf
*cf
= shader
->cfs
[i
];
171 if ((cf
->cf_type
== EXEC
) || (cf
->cf_type
== EXEC_END
)) {
172 for (j
= 0; j
< cf
->exec
.instrs_count
; j
++) {
173 ret
= instr_emit(cf
->exec
.instrs
[j
], ptr
, idx
++, info
);
175 ERROR_MSG("instruction emit failed: %d", ret
);
179 assert((ptr
- dwords
) <= info
->sizedwords
);
192 struct ir2_cf
* ir2_cf_create(struct ir2_shader
*shader
, instr_cf_opc_t cf_type
)
194 struct ir2_cf
*cf
= ir2_alloc(shader
, sizeof(struct ir2_cf
));
195 DEBUG_MSG("%d", cf_type
);
197 cf
->cf_type
= cf_type
;
198 assert(shader
->cfs_count
< ARRAY_SIZE(shader
->cfs
));
199 shader
->cfs
[shader
->cfs_count
++] = cf
;
208 static int cf_emit(struct ir2_cf
*cf
, instr_cf_t
*instr
)
210 memset(instr
, 0, sizeof(*instr
));
212 instr
->opc
= cf
->cf_type
;
214 switch (cf
->cf_type
) {
219 assert(cf
->exec
.addr
<= 0x1ff);
220 assert(cf
->exec
.cnt
<= 0x6);
221 assert(cf
->exec
.sequence
<= 0xfff);
222 instr
->exec
.address
= cf
->exec
.addr
;
223 instr
->exec
.count
= cf
->exec
.cnt
;
224 instr
->exec
.serialize
= cf
->exec
.sequence
;
227 assert(cf
->alloc
.size
<= 0xf);
228 instr
->alloc
.size
= cf
->alloc
.size
;
229 switch (cf
->alloc
.type
) {
231 case SQ_PARAMETER_PIXEL
:
232 instr
->alloc
.buffer_select
= cf
->alloc
.type
;
235 ERROR_MSG("invalid alloc type: %d", cf
->alloc
.type
);
242 case COND_PRED_EXEC_END
:
248 case COND_EXEC_PRED_CLEAN
:
249 case COND_EXEC_PRED_CLEAN_END
:
250 case MARK_VS_FETCH_DONE
:
259 struct ir2_instruction
* ir2_instr_create(struct ir2_cf
*cf
, int instr_type
)
261 struct ir2_instruction
*instr
=
262 ir2_alloc(cf
->shader
, sizeof(struct ir2_instruction
));
263 DEBUG_MSG("%d", instr_type
);
264 instr
->shader
= cf
->shader
;
265 instr
->pred
= cf
->shader
->pred
;
266 instr
->instr_type
= instr_type
;
267 assert(cf
->exec
.instrs_count
< ARRAY_SIZE(cf
->exec
.instrs
));
268 cf
->exec
.instrs
[cf
->exec
.instrs_count
++] = instr
;
274 * FETCH instructions:
277 static int instr_emit_fetch(struct ir2_instruction
*instr
,
278 uint32_t *dwords
, uint32_t idx
,
279 struct ir2_shader_info
*info
)
281 instr_fetch_t
*fetch
= (instr_fetch_t
*)dwords
;
283 struct ir2_register
*dst_reg
= instr
->regs
[reg
++];
284 struct ir2_register
*src_reg
= instr
->regs
[reg
++];
286 memset(fetch
, 0, sizeof(*fetch
));
288 reg_update_stats(dst_reg
, info
, true);
289 reg_update_stats(src_reg
, info
, false);
291 fetch
->opc
= instr
->fetch
.opc
;
293 if (instr
->fetch
.opc
== VTX_FETCH
) {
294 instr_fetch_vtx_t
*vtx
= &fetch
->vtx
;
296 assert(instr
->fetch
.stride
<= 0xff);
297 assert(instr
->fetch
.fmt
<= 0x3f);
298 assert(instr
->fetch
.const_idx
<= 0x1f);
299 assert(instr
->fetch
.const_idx_sel
<= 0x3);
301 vtx
->src_reg
= src_reg
->num
;
302 vtx
->src_swiz
= reg_fetch_src_swiz(src_reg
, 1);
303 vtx
->dst_reg
= dst_reg
->num
;
304 vtx
->dst_swiz
= reg_fetch_dst_swiz(dst_reg
);
305 vtx
->must_be_one
= 1;
306 vtx
->const_index
= instr
->fetch
.const_idx
;
307 vtx
->const_index_sel
= instr
->fetch
.const_idx_sel
;
308 vtx
->format_comp_all
= !!instr
->fetch
.is_signed
;
309 vtx
->num_format_all
= !instr
->fetch
.is_normalized
;
310 vtx
->format
= instr
->fetch
.fmt
;
311 vtx
->stride
= instr
->fetch
.stride
;
312 vtx
->offset
= instr
->fetch
.offset
;
314 if (instr
->pred
!= IR2_PRED_NONE
) {
315 vtx
->pred_select
= 1;
316 vtx
->pred_condition
= (instr
->pred
== IR2_PRED_EQ
) ? 1 : 0;
319 /* XXX seems like every FETCH but the first has
322 vtx
->reserved3
= (idx
> 0) ? 0x1 : 0x0;
323 vtx
->reserved0
= (idx
> 0) ? 0x2 : 0x3;
324 } else if (instr
->fetch
.opc
== TEX_FETCH
) {
325 instr_fetch_tex_t
*tex
= &fetch
->tex
;
327 assert(instr
->fetch
.const_idx
<= 0x1f);
329 tex
->src_reg
= src_reg
->num
;
330 tex
->src_swiz
= reg_fetch_src_swiz(src_reg
, 3);
331 tex
->dst_reg
= dst_reg
->num
;
332 tex
->dst_swiz
= reg_fetch_dst_swiz(dst_reg
);
333 tex
->const_idx
= instr
->fetch
.const_idx
;
334 tex
->mag_filter
= TEX_FILTER_USE_FETCH_CONST
;
335 tex
->min_filter
= TEX_FILTER_USE_FETCH_CONST
;
336 tex
->mip_filter
= TEX_FILTER_USE_FETCH_CONST
;
337 tex
->aniso_filter
= ANISO_FILTER_USE_FETCH_CONST
;
338 tex
->arbitrary_filter
= ARBITRARY_FILTER_USE_FETCH_CONST
;
339 tex
->vol_mag_filter
= TEX_FILTER_USE_FETCH_CONST
;
340 tex
->vol_min_filter
= TEX_FILTER_USE_FETCH_CONST
;
341 tex
->use_comp_lod
= 1;
342 tex
->use_reg_lod
= !instr
->fetch
.is_cube
;
343 tex
->sample_location
= SAMPLE_CENTER
;
345 if (instr
->pred
!= IR2_PRED_NONE
) {
346 tex
->pred_select
= 1;
347 tex
->pred_condition
= (instr
->pred
== IR2_PRED_EQ
) ? 1 : 0;
351 ERROR_MSG("invalid fetch opc: %d\n", instr
->fetch
.opc
);
362 static int instr_emit_alu(struct ir2_instruction
*instr
, uint32_t *dwords
,
363 struct ir2_shader_info
*info
)
366 instr_alu_t
*alu
= (instr_alu_t
*)dwords
;
367 struct ir2_register
*dst_reg
= instr
->regs
[reg
++];
368 struct ir2_register
*src1_reg
;
369 struct ir2_register
*src2_reg
;
370 struct ir2_register
*src3_reg
;
372 memset(alu
, 0, sizeof(*alu
));
374 /* handle instructions w/ 3 src operands: */
375 switch (instr
->alu
.vector_opc
) {
381 /* note: disassembler lists 3rd src first, ie:
382 * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
383 * which is the reason for this strange ordering.
385 src3_reg
= instr
->regs
[reg
++];
392 src1_reg
= instr
->regs
[reg
++];
393 src2_reg
= instr
->regs
[reg
++];
395 reg_update_stats(dst_reg
, info
, true);
396 reg_update_stats(src1_reg
, info
, false);
397 reg_update_stats(src2_reg
, info
, false);
399 assert((dst_reg
->flags
& ~IR2_REG_EXPORT
) == 0);
400 assert(!dst_reg
->swizzle
|| (strlen(dst_reg
->swizzle
) == 4));
401 assert((src1_reg
->flags
& IR2_REG_EXPORT
) == 0);
402 assert(!src1_reg
->swizzle
|| (strlen(src1_reg
->swizzle
) == 4));
403 assert((src2_reg
->flags
& IR2_REG_EXPORT
) == 0);
404 assert(!src2_reg
->swizzle
|| (strlen(src2_reg
->swizzle
) == 4));
406 if (instr
->alu
.vector_opc
== ~0) {
407 alu
->vector_opc
= MAXv
;
408 alu
->vector_write_mask
= 0;
410 alu
->vector_opc
= instr
->alu
.vector_opc
;
411 alu
->vector_write_mask
= reg_alu_dst_swiz(dst_reg
);
414 alu
->vector_dest
= dst_reg
->num
;
415 alu
->export_data
= !!(dst_reg
->flags
& IR2_REG_EXPORT
);
417 // TODO predicate case/condition.. need to add to parser
419 alu
->src2_reg
= src2_reg
->num
;
420 alu
->src2_swiz
= reg_alu_src_swiz(src2_reg
);
421 alu
->src2_reg_negate
= !!(src2_reg
->flags
& IR2_REG_NEGATE
);
422 alu
->src2_reg_abs
= !!(src2_reg
->flags
& IR2_REG_ABS
);
423 alu
->src2_sel
= !(src2_reg
->flags
& IR2_REG_CONST
);
425 alu
->src1_reg
= src1_reg
->num
;
426 alu
->src1_swiz
= reg_alu_src_swiz(src1_reg
);
427 alu
->src1_reg_negate
= !!(src1_reg
->flags
& IR2_REG_NEGATE
);
428 alu
->src1_reg_abs
= !!(src1_reg
->flags
& IR2_REG_ABS
);
429 alu
->src1_sel
= !(src1_reg
->flags
& IR2_REG_CONST
);
431 alu
->vector_clamp
= instr
->alu
.vector_clamp
;
432 alu
->scalar_clamp
= instr
->alu
.scalar_clamp
;
434 if (instr
->alu
.scalar_opc
!= ~0) {
435 struct ir2_register
*sdst_reg
= instr
->regs
[reg
++];
437 reg_update_stats(sdst_reg
, info
, true);
439 assert(sdst_reg
->flags
== dst_reg
->flags
);
442 assert(src3_reg
== instr
->regs
[reg
++]);
444 src3_reg
= instr
->regs
[reg
++];
447 alu
->scalar_dest
= sdst_reg
->num
;
448 alu
->scalar_write_mask
= reg_alu_dst_swiz(sdst_reg
);
449 alu
->scalar_opc
= instr
->alu
.scalar_opc
;
451 /* not sure if this is required, but adreno compiler seems
452 * to always set scalar opc to MAXs if it is not used:
454 alu
->scalar_opc
= MAXs
;
458 reg_update_stats(src3_reg
, info
, false);
460 alu
->src3_reg
= src3_reg
->num
;
461 alu
->src3_swiz
= reg_alu_src_swiz(src3_reg
);
462 alu
->src3_reg_negate
= !!(src3_reg
->flags
& IR2_REG_NEGATE
);
463 alu
->src3_reg_abs
= !!(src3_reg
->flags
& IR2_REG_ABS
);
464 alu
->src3_sel
= !(src3_reg
->flags
& IR2_REG_CONST
);
466 /* not sure if this is required, but adreno compiler seems
467 * to always set register bank for 3rd src if unused:
472 if (instr
->pred
!= IR2_PRED_NONE
) {
473 alu
->pred_select
= (instr
->pred
== IR2_PRED_EQ
) ? 3 : 2;
479 static int instr_emit(struct ir2_instruction
*instr
, uint32_t *dwords
,
480 uint32_t idx
, struct ir2_shader_info
*info
)
482 switch (instr
->instr_type
) {
483 case IR2_FETCH
: return instr_emit_fetch(instr
, dwords
, idx
, info
);
484 case IR2_ALU
: return instr_emit_alu(instr
, dwords
, info
);
490 struct ir2_register
* ir2_reg_create(struct ir2_instruction
*instr
,
491 int num
, const char *swizzle
, int flags
)
493 struct ir2_register
*reg
=
494 ir2_alloc(instr
->shader
, sizeof(struct ir2_register
));
495 DEBUG_MSG("%x, %d, %s", flags
, num
, swizzle
);
496 assert(num
<= REG_MASK
);
499 reg
->swizzle
= ir2_strdup(instr
->shader
, swizzle
);
500 assert(instr
->regs_count
< ARRAY_SIZE(instr
->regs
));
501 instr
->regs
[instr
->regs_count
++] = reg
;
505 static void reg_update_stats(struct ir2_register
*reg
,
506 struct ir2_shader_info
*info
, bool dest
)
508 if (!(reg
->flags
& (IR2_REG_CONST
|IR2_REG_EXPORT
))) {
509 info
->max_reg
= MAX2(info
->max_reg
, reg
->num
);
512 info
->regs_written
|= (1 << reg
->num
);
513 } else if (!(info
->regs_written
& (1 << reg
->num
))) {
514 /* for registers that haven't been written, they must be an
515 * input register that the thread scheduler (presumably?)
516 * needs to know about:
518 info
->max_input_reg
= MAX2(info
->max_input_reg
, reg
->num
);
523 static uint32_t reg_fetch_src_swiz(struct ir2_register
*reg
, uint32_t n
)
528 assert(reg
->flags
== 0);
529 assert(reg
->swizzle
);
531 DEBUG_MSG("fetch src R%d.%s", reg
->num
, reg
->swizzle
);
533 for (i
= n
-1; i
>= 0; i
--) {
535 switch (reg
->swizzle
[i
]) {
537 ERROR_MSG("invalid fetch src swizzle: %s", reg
->swizzle
);
538 case 'x': swiz
|= 0x0; break;
539 case 'y': swiz
|= 0x1; break;
540 case 'z': swiz
|= 0x2; break;
541 case 'w': swiz
|= 0x3; break;
548 static uint32_t reg_fetch_dst_swiz(struct ir2_register
*reg
)
553 assert(reg
->flags
== 0);
554 assert(!reg
->swizzle
|| (strlen(reg
->swizzle
) == 4));
556 DEBUG_MSG("fetch dst R%d.%s", reg
->num
, reg
->swizzle
);
559 for (i
= 3; i
>= 0; i
--) {
561 switch (reg
->swizzle
[i
]) {
563 ERROR_MSG("invalid dst swizzle: %s", reg
->swizzle
);
564 case 'x': swiz
|= 0x0; break;
565 case 'y': swiz
|= 0x1; break;
566 case 'z': swiz
|= 0x2; break;
567 case 'w': swiz
|= 0x3; break;
568 case '0': swiz
|= 0x4; break;
569 case '1': swiz
|= 0x5; break;
570 case '_': swiz
|= 0x7; break;
580 /* actually, a write-mask */
581 static uint32_t reg_alu_dst_swiz(struct ir2_register
*reg
)
586 assert((reg
->flags
& ~IR2_REG_EXPORT
) == 0);
587 assert(!reg
->swizzle
|| (strlen(reg
->swizzle
) == 4));
589 DEBUG_MSG("alu dst R%d.%s", reg
->num
, reg
->swizzle
);
592 for (i
= 3; i
>= 0; i
--) {
594 if (reg
->swizzle
[i
] == "xyzw"[i
]) {
596 } else if (reg
->swizzle
[i
] != '_') {
597 ERROR_MSG("invalid dst swizzle: %s", reg
->swizzle
);
608 static uint32_t reg_alu_src_swiz(struct ir2_register
*reg
)
613 assert((reg
->flags
& IR2_REG_EXPORT
) == 0);
614 assert(!reg
->swizzle
|| (strlen(reg
->swizzle
) == 4));
616 DEBUG_MSG("vector src R%d.%s", reg
->num
, reg
->swizzle
);
619 for (i
= 3; i
>= 0; i
--) {
621 switch (reg
->swizzle
[i
]) {
623 ERROR_MSG("invalid vector src swizzle: %s", reg
->swizzle
);
624 case 'x': swiz
|= (0x0 - i
) & 0x3; break;
625 case 'y': swiz
|= (0x1 - i
) & 0x3; break;
626 case 'z': swiz
|= (0x2 - i
) & 0x3; break;
627 case 'w': swiz
|= (0x3 - i
) & 0x3; break;