freedreno/a3xx/compiler: prepare for new compiler
[mesa.git] / src / gallium / drivers / freedreno / a3xx / fd3_compiler.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include <stdarg.h>
30
31 #include "pipe/p_state.h"
32 #include "util/u_string.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_strings.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_scan.h"
41
42 #include "fd3_compiler.h"
43 #include "fd3_program.h"
44 #include "fd3_util.h"
45
46 #include "instr-a3xx.h"
47 #include "ir3.h"
48
49
50 struct fd3_compile_context {
51 const struct tgsi_token *tokens;
52 struct ir3_shader *ir;
53 struct ir3_block *block;
54 struct fd3_shader_stateobj *so;
55
56 struct tgsi_parse_context parser;
57 unsigned type;
58
59 struct tgsi_shader_info info;
60
61 /* last input dst (for setting (ei) flag): */
62 struct ir3_register *last_input;
63
64 /* last instruction with relative addressing: */
65 struct ir3_instruction *last_rel;
66
67 /* for calculating input/output positions/linkages: */
68 unsigned next_inloc;
69
70 unsigned num_internal_temps;
71 struct tgsi_src_register internal_temps[6];
72
73 /* track registers which need to synchronize w/ "complex alu" cat3
74 * instruction pipeline:
75 */
76 regmask_t needs_ss;
77
78 /* track registers which need to synchronize with texture fetch
79 * pipeline:
80 */
81 regmask_t needs_sy;
82
83 /* inputs start at r0, temporaries start after last input, and
84 * outputs start after last temporary.
85 *
86 * We could be more clever, because this is not a hw restriction,
87 * but probably best just to implement an optimizing pass to
88 * reduce the # of registers used and get rid of redundant mov's
89 * (to output register).
90 */
91 unsigned base_reg[TGSI_FILE_COUNT];
92
93 /* idx/slot for last compiler generated immediate */
94 unsigned immediate_idx;
95
96 /* stack of branch instructions that start (potentially nested)
97 * branch instructions, so that we can fix up the branch targets
98 * so that we can fix up the branch target on the corresponding
99 * END instruction
100 */
101 struct ir3_instruction *branch[16];
102 unsigned int branch_count;
103
104 /* used when dst is same as one of the src, to avoid overwriting a
105 * src element before the remaining scalar instructions that make
106 * up the vector operation
107 */
108 struct tgsi_dst_register tmp_dst;
109 struct tgsi_src_register *tmp_src;
110 };
111
112
113 static void vectorize(struct fd3_compile_context *ctx,
114 struct ir3_instruction *instr, struct tgsi_dst_register *dst,
115 int nsrcs, ...);
116 static void create_mov(struct fd3_compile_context *ctx,
117 struct tgsi_dst_register *dst, struct tgsi_src_register *src);
118
119 static unsigned
120 compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
121 const struct tgsi_token *tokens)
122 {
123 unsigned ret, base = 0;
124 struct tgsi_shader_info *info = &ctx->info;
125
126 ctx->tokens = tokens;
127 ctx->ir = so->ir;
128 ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
129 ctx->so = so;
130 ctx->last_input = NULL;
131 ctx->last_rel = NULL;
132 ctx->next_inloc = 8;
133 ctx->num_internal_temps = 0;
134 ctx->branch_count = 0;
135
136 regmask_init(&ctx->needs_ss);
137 regmask_init(&ctx->needs_sy);
138 memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
139
140 tgsi_scan_shader(tokens, &ctx->info);
141
142 /* Immediates go after constants: */
143 ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
144 ctx->base_reg[TGSI_FILE_IMMEDIATE] =
145 info->file_max[TGSI_FILE_CONSTANT] + 1;
146
147 /* if full precision and fragment shader, don't clobber
148 * r0.x w/ bary fetch:
149 */
150 if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
151 base = 1;
152
153 /* Temporaries after outputs after inputs: */
154 ctx->base_reg[TGSI_FILE_INPUT] = base;
155 ctx->base_reg[TGSI_FILE_OUTPUT] = base +
156 info->file_max[TGSI_FILE_INPUT] + 1;
157 ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
158 info->file_max[TGSI_FILE_INPUT] + 1 +
159 info->file_max[TGSI_FILE_OUTPUT] + 1;
160
161 so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
162 ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
163
164 ret = tgsi_parse_init(&ctx->parser, tokens);
165 if (ret != TGSI_PARSE_OK)
166 return ret;
167
168 ctx->type = ctx->parser.FullHeader.Processor.Processor;
169
170 return ret;
171 }
172
173 static void
174 compile_error(struct fd3_compile_context *ctx, const char *format, ...)
175 {
176 va_list ap;
177 va_start(ap, format);
178 _debug_vprintf(format, ap);
179 va_end(ap);
180 tgsi_dump(ctx->tokens, 0);
181 debug_assert(0);
182 }
183
184 #define compile_assert(ctx, cond) do { \
185 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
186 } while (0)
187
188 static void
189 compile_free(struct fd3_compile_context *ctx)
190 {
191 tgsi_parse_free(&ctx->parser);
192 }
193
194 struct instr_translater {
195 void (*fxn)(const struct instr_translater *t,
196 struct fd3_compile_context *ctx,
197 struct tgsi_full_instruction *inst);
198 unsigned tgsi_opc;
199 opc_t opc;
200 opc_t hopc; /* opc to use for half_precision mode, if different */
201 unsigned arg;
202 };
203
204 static void
205 handle_last_rel(struct fd3_compile_context *ctx)
206 {
207 if (ctx->last_rel) {
208 ctx->last_rel->flags |= IR3_INSTR_UL;
209 ctx->last_rel = NULL;
210 }
211 }
212
213 static struct ir3_instruction *
214 instr_create(struct fd3_compile_context *ctx, int category, opc_t opc)
215 {
216 return ir3_instr_create(ctx->block, category, opc);
217 }
218
219 static void
220 add_nop(struct fd3_compile_context *ctx, unsigned count)
221 {
222 while (count-- > 0)
223 instr_create(ctx, 0, OPC_NOP);
224 }
225
226 static unsigned
227 src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
228 {
229 unsigned flags = 0;
230
231 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
232 return flags;
233
234 if (regmask_get(&ctx->needs_ss, reg)) {
235 flags |= IR3_INSTR_SS;
236 regmask_init(&ctx->needs_ss);
237 }
238
239 if (regmask_get(&ctx->needs_sy, reg)) {
240 flags |= IR3_INSTR_SY;
241 regmask_init(&ctx->needs_sy);
242 }
243
244 return flags;
245 }
246
247 static struct ir3_register *
248 add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
249 const struct tgsi_dst_register *dst, unsigned chan)
250 {
251 unsigned flags = 0, num = 0;
252 struct ir3_register *reg;
253
254 switch (dst->File) {
255 case TGSI_FILE_OUTPUT:
256 case TGSI_FILE_TEMPORARY:
257 num = dst->Index + ctx->base_reg[dst->File];
258 break;
259 case TGSI_FILE_ADDRESS:
260 num = REG_A0;
261 break;
262 default:
263 compile_error(ctx, "unsupported dst register file: %s\n",
264 tgsi_file_name(dst->File));
265 break;
266 }
267
268 if (dst->Indirect)
269 flags |= IR3_REG_RELATIV;
270 if (ctx->so->half_precision)
271 flags |= IR3_REG_HALF;
272
273 reg = ir3_reg_create(instr, regid(num, chan), flags);
274
275 if (dst->Indirect)
276 ctx->last_rel = instr;
277
278 return reg;
279 }
280
281 static struct ir3_register *
282 add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
283 const struct tgsi_src_register *src, unsigned chan)
284 {
285 unsigned flags = 0, num = 0;
286 struct ir3_register *reg;
287
288 /* TODO we need to use a mov to temp for const >= 64.. or maybe
289 * we could use relative addressing..
290 */
291 compile_assert(ctx, src->Index < 64);
292
293 switch (src->File) {
294 case TGSI_FILE_IMMEDIATE:
295 /* TODO if possible, use actual immediate instead of const.. but
296 * TGSI has vec4 immediates, we can only embed scalar (of limited
297 * size, depending on instruction..)
298 */
299 case TGSI_FILE_CONSTANT:
300 flags |= IR3_REG_CONST;
301 num = src->Index + ctx->base_reg[src->File];
302 break;
303 case TGSI_FILE_OUTPUT:
304 /* NOTE: we should only end up w/ OUTPUT file for things like
305 * clamp()'ing saturated dst instructions
306 */
307 case TGSI_FILE_INPUT:
308 case TGSI_FILE_TEMPORARY:
309 num = src->Index + ctx->base_reg[src->File];
310 break;
311 default:
312 compile_error(ctx, "unsupported src register file: %s\n",
313 tgsi_file_name(src->File));
314 break;
315 }
316
317 if (src->Absolute)
318 flags |= IR3_REG_ABS;
319 if (src->Negate)
320 flags |= IR3_REG_NEGATE;
321 if (src->Indirect)
322 flags |= IR3_REG_RELATIV;
323 if (ctx->so->half_precision)
324 flags |= IR3_REG_HALF;
325
326 reg = ir3_reg_create(instr, regid(num, chan), flags);
327
328 if (src->Indirect)
329 ctx->last_rel = instr;
330
331 instr->flags |= src_flags(ctx, reg);
332
333 return reg;
334 }
335
336 static void
337 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
338 {
339 src->File = dst->File;
340 src->Indirect = dst->Indirect;
341 src->Dimension = dst->Dimension;
342 src->Index = dst->Index;
343 src->Absolute = 0;
344 src->Negate = 0;
345 src->SwizzleX = TGSI_SWIZZLE_X;
346 src->SwizzleY = TGSI_SWIZZLE_Y;
347 src->SwizzleZ = TGSI_SWIZZLE_Z;
348 src->SwizzleW = TGSI_SWIZZLE_W;
349 }
350
351 /* Get internal-temp src/dst to use for a sequence of instructions
352 * generated by a single TGSI op.
353 */
354 static struct tgsi_src_register *
355 get_internal_temp(struct fd3_compile_context *ctx,
356 struct tgsi_dst_register *tmp_dst)
357 {
358 struct tgsi_src_register *tmp_src;
359 int n;
360
361 tmp_dst->File = TGSI_FILE_TEMPORARY;
362 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
363 tmp_dst->Indirect = 0;
364 tmp_dst->Dimension = 0;
365
366 /* assign next temporary: */
367 n = ctx->num_internal_temps++;
368 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
369 tmp_src = &ctx->internal_temps[n];
370
371 tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
372
373 src_from_dst(tmp_src, tmp_dst);
374
375 return tmp_src;
376 }
377
378 /* Get internal half-precision temp src/dst to use for a sequence of
379 * instructions generated by a single TGSI op.
380 */
381 static struct tgsi_src_register *
382 get_internal_temp_hr(struct fd3_compile_context *ctx,
383 struct tgsi_dst_register *tmp_dst)
384 {
385 struct tgsi_src_register *tmp_src;
386 int n;
387
388 if (ctx->so->half_precision)
389 return get_internal_temp(ctx, tmp_dst);
390
391 tmp_dst->File = TGSI_FILE_TEMPORARY;
392 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
393 tmp_dst->Indirect = 0;
394 tmp_dst->Dimension = 0;
395
396 /* assign next temporary: */
397 n = ctx->num_internal_temps++;
398 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
399 tmp_src = &ctx->internal_temps[n];
400
401 /* just use hr0 because no one else should be using half-
402 * precision regs:
403 */
404 tmp_dst->Index = 0;
405
406 src_from_dst(tmp_src, tmp_dst);
407
408 return tmp_src;
409 }
410
411 static inline bool
412 is_const(struct tgsi_src_register *src)
413 {
414 return (src->File == TGSI_FILE_CONSTANT) ||
415 (src->File == TGSI_FILE_IMMEDIATE);
416 }
417
418 static inline bool
419 is_relative(struct tgsi_src_register *src)
420 {
421 return src->Indirect;
422 }
423
424 static inline bool
425 is_rel_or_const(struct tgsi_src_register *src)
426 {
427 return is_relative(src) || is_const(src);
428 }
429
430 static type_t
431 get_ftype(struct fd3_compile_context *ctx)
432 {
433 return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
434 }
435
436 static type_t
437 get_utype(struct fd3_compile_context *ctx)
438 {
439 return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
440 }
441
442 static unsigned
443 src_swiz(struct tgsi_src_register *src, int chan)
444 {
445 switch (chan) {
446 case 0: return src->SwizzleX;
447 case 1: return src->SwizzleY;
448 case 2: return src->SwizzleZ;
449 case 3: return src->SwizzleW;
450 }
451 assert(0);
452 return 0;
453 }
454
455 /* for instructions that cannot take a const register as src, if needed
456 * generate a move to temporary gpr:
457 */
458 static struct tgsi_src_register *
459 get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
460 {
461 struct tgsi_dst_register tmp_dst;
462 struct tgsi_src_register *tmp_src;
463
464 compile_assert(ctx, is_rel_or_const(src));
465
466 tmp_src = get_internal_temp(ctx, &tmp_dst);
467
468 create_mov(ctx, &tmp_dst, src);
469
470 return tmp_src;
471 }
472
473 static void
474 get_immediate(struct fd3_compile_context *ctx,
475 struct tgsi_src_register *reg, uint32_t val)
476 {
477 unsigned neg, swiz, idx, i;
478 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
479 static const unsigned swiz2tgsi[] = {
480 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
481 };
482
483 for (i = 0; i < ctx->immediate_idx; i++) {
484 swiz = i % 4;
485 idx = i / 4;
486
487 if (ctx->so->immediates[idx].val[swiz] == val) {
488 neg = 0;
489 break;
490 }
491
492 if (ctx->so->immediates[idx].val[swiz] == -val) {
493 neg = 1;
494 break;
495 }
496 }
497
498 if (i == ctx->immediate_idx) {
499 /* need to generate a new immediate: */
500 swiz = i % 4;
501 idx = i / 4;
502 neg = 0;
503 ctx->so->immediates[idx].val[swiz] = val;
504 ctx->so->immediates_count = idx + 1;
505 ctx->immediate_idx++;
506 }
507
508 reg->File = TGSI_FILE_IMMEDIATE;
509 reg->Indirect = 0;
510 reg->Dimension = 0;
511 reg->Index = idx;
512 reg->Absolute = 0;
513 reg->Negate = neg;
514 reg->SwizzleX = swiz2tgsi[swiz];
515 reg->SwizzleY = swiz2tgsi[swiz];
516 reg->SwizzleZ = swiz2tgsi[swiz];
517 reg->SwizzleW = swiz2tgsi[swiz];
518 }
519
520 static void
521 create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
522 struct tgsi_src_register *src)
523 {
524 type_t type_mov = get_ftype(ctx);
525 unsigned i;
526
527 for (i = 0; i < 4; i++) {
528 /* move to destination: */
529 if (dst->WriteMask & (1 << i)) {
530 struct ir3_instruction *instr;
531
532 if (src->Absolute || src->Negate) {
533 /* can't have abs or neg on a mov instr, so use
534 * absneg.f instead to handle these cases:
535 */
536 instr = instr_create(ctx, 2, OPC_ABSNEG_F);
537 } else {
538 instr = instr_create(ctx, 1, 0);
539 instr->cat1.src_type = type_mov;
540 instr->cat1.dst_type = type_mov;
541 }
542
543 add_dst_reg(ctx, instr, dst, i);
544 add_src_reg(ctx, instr, src, src_swiz(src, i));
545 } else {
546 add_nop(ctx, 1);
547 }
548 }
549 }
550
551 static void
552 create_clamp(struct fd3_compile_context *ctx,
553 struct tgsi_dst_register *dst, struct tgsi_src_register *val,
554 struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
555 {
556 struct ir3_instruction *instr;
557
558 instr = instr_create(ctx, 2, OPC_MAX_F);
559 vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
560
561 instr = instr_create(ctx, 2, OPC_MIN_F);
562 vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
563 }
564
565 static void
566 create_clamp_imm(struct fd3_compile_context *ctx,
567 struct tgsi_dst_register *dst,
568 uint32_t minval, uint32_t maxval)
569 {
570 struct tgsi_src_register minconst, maxconst;
571 struct tgsi_src_register src;
572
573 src_from_dst(&src, dst);
574
575 get_immediate(ctx, &minconst, minval);
576 get_immediate(ctx, &maxconst, maxval);
577
578 create_clamp(ctx, dst, &src, &minconst, &maxconst);
579 }
580
581 static struct tgsi_dst_register *
582 get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
583 {
584 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
585 unsigned i;
586 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
587 struct tgsi_src_register *src = &inst->Src[i].Register;
588 if ((src->File == dst->File) && (src->Index == dst->Index)) {
589 if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
590 (src->SwizzleX == TGSI_SWIZZLE_X) &&
591 (src->SwizzleY == TGSI_SWIZZLE_Y) &&
592 (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
593 (src->SwizzleW == TGSI_SWIZZLE_W))
594 continue;
595 ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
596 ctx->tmp_dst.WriteMask = dst->WriteMask;
597 dst = &ctx->tmp_dst;
598 break;
599 }
600 }
601 return dst;
602 }
603
604 static void
605 put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
606 struct tgsi_dst_register *dst)
607 {
608 /* if necessary, add mov back into original dst: */
609 if (dst != &inst->Dst[0].Register) {
610 create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
611 }
612 }
613
614 /* helper to generate the necessary repeat and/or additional instructions
615 * to turn a scalar instruction into a vector operation:
616 */
617 static void
618 vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
619 struct tgsi_dst_register *dst, int nsrcs, ...)
620 {
621 va_list ap;
622 int i, j, n = 0;
623 bool indirect = dst->Indirect;
624
625 add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
626
627 va_start(ap, nsrcs);
628 for (j = 0; j < nsrcs; j++) {
629 struct tgsi_src_register *src =
630 va_arg(ap, struct tgsi_src_register *);
631 unsigned flags = va_arg(ap, unsigned);
632 struct ir3_register *reg;
633 if (flags & IR3_REG_IMMED) {
634 reg = ir3_reg_create(instr, 0, IR3_REG_IMMED);
635 /* this is an ugly cast.. should have put flags first! */
636 reg->iim_val = *(int *)&src;
637 } else {
638 reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
639 indirect |= src->Indirect;
640 }
641 reg->flags |= flags & ~IR3_REG_NEGATE;
642 if (flags & IR3_REG_NEGATE)
643 reg->flags ^= IR3_REG_NEGATE;
644 }
645 va_end(ap);
646
647 for (i = 0; i < 4; i++) {
648 if (dst->WriteMask & (1 << i)) {
649 struct ir3_instruction *cur;
650
651 if (n++ == 0) {
652 cur = instr;
653 } else {
654 cur = ir3_instr_clone(instr);
655 cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
656 }
657
658 /* fix-up dst register component: */
659 cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
660
661 /* fix-up src register component: */
662 va_start(ap, nsrcs);
663 for (j = 0; j < nsrcs; j++) {
664 struct tgsi_src_register *src =
665 va_arg(ap, struct tgsi_src_register *);
666 unsigned flags = va_arg(ap, unsigned);
667 if (!(flags & IR3_REG_IMMED)) {
668 cur->regs[j+1]->num =
669 regid(cur->regs[j+1]->num >> 2,
670 src_swiz(src, i));
671 cur->flags |= src_flags(ctx, cur->regs[j+1]);
672 }
673 }
674 va_end(ap);
675
676 if (indirect)
677 ctx->last_rel = cur;
678 }
679 }
680
681 /* pad w/ nop's.. at least until we are clever enough to
682 * figure out if we really need to..
683 */
684 add_nop(ctx, 4 - n);
685 }
686
687 /*
688 * Handlers for TGSI instructions which do not have a 1:1 mapping to
689 * native instructions:
690 */
691
692 static void
693 trans_clamp(const struct instr_translater *t,
694 struct fd3_compile_context *ctx,
695 struct tgsi_full_instruction *inst)
696 {
697 struct tgsi_dst_register *dst = get_dst(ctx, inst);
698 struct tgsi_src_register *src0 = &inst->Src[0].Register;
699 struct tgsi_src_register *src1 = &inst->Src[1].Register;
700 struct tgsi_src_register *src2 = &inst->Src[2].Register;
701
702 create_clamp(ctx, dst, src0, src1, src2);
703
704 put_dst(ctx, inst, dst);
705 }
706
707 /* ARL(x) = x, but mova from hrN.x to a0.. */
708 static void
709 trans_arl(const struct instr_translater *t,
710 struct fd3_compile_context *ctx,
711 struct tgsi_full_instruction *inst)
712 {
713 struct ir3_instruction *instr;
714 struct tgsi_dst_register tmp_dst;
715 struct tgsi_src_register *tmp_src;
716 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
717 struct tgsi_src_register *src = &inst->Src[0].Register;
718 unsigned chan = src->SwizzleX;
719 compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
720
721 handle_last_rel(ctx);
722
723 tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
724
725 /* cov.{f32,f16}s16 Rtmp, Rsrc */
726 instr = instr_create(ctx, 1, 0);
727 instr->cat1.src_type = get_ftype(ctx);
728 instr->cat1.dst_type = TYPE_S16;
729 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
730 add_src_reg(ctx, instr, src, chan);
731
732 add_nop(ctx, 3);
733
734 /* shl.b Rtmp, Rtmp, 2 */
735 instr = instr_create(ctx, 2, OPC_SHL_B);
736 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
737 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
738 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
739
740 add_nop(ctx, 3);
741
742 /* mova a0, Rtmp */
743 instr = instr_create(ctx, 1, 0);
744 instr->cat1.src_type = TYPE_S16;
745 instr->cat1.dst_type = TYPE_S16;
746 add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
747 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
748
749 /* need to ensure 5 instr slots before a0 is used: */
750 add_nop(ctx, 6);
751 }
752
753 /* texture fetch/sample instructions: */
754 static void
755 trans_samp(const struct instr_translater *t,
756 struct fd3_compile_context *ctx,
757 struct tgsi_full_instruction *inst)
758 {
759 struct ir3_register *r;
760 struct ir3_instruction *instr;
761 struct tgsi_src_register *coord = &inst->Src[0].Register;
762 struct tgsi_src_register *samp = &inst->Src[1].Register;
763 unsigned tex = inst->Texture.Texture;
764 int8_t *order;
765 unsigned i, flags = 0, src_wrmask;
766 bool needs_mov = false;
767
768 switch (t->arg) {
769 case TGSI_OPCODE_TEX:
770 if (tex == TGSI_TEXTURE_2D) {
771 order = (int8_t[4]){ 0, 1, -1, -1 };
772 src_wrmask = TGSI_WRITEMASK_XY;
773 } else {
774 order = (int8_t[4]){ 0, 1, 2, -1 };
775 src_wrmask = TGSI_WRITEMASK_XYZ;
776 }
777 break;
778 case TGSI_OPCODE_TXP:
779 if (tex == TGSI_TEXTURE_2D) {
780 order = (int8_t[4]){ 0, 1, 3, -1 };
781 src_wrmask = TGSI_WRITEMASK_XYZ;
782 } else {
783 order = (int8_t[4]){ 0, 1, 2, 3 };
784 src_wrmask = TGSI_WRITEMASK_XYZW;
785 }
786 flags |= IR3_INSTR_P;
787 break;
788 default:
789 compile_assert(ctx, 0);
790 break;
791 }
792
793 if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) {
794 add_nop(ctx, 3);
795 flags |= IR3_INSTR_3D;
796 }
797
798 /* cat5 instruction cannot seem to handle const or relative: */
799 if (is_rel_or_const(coord))
800 needs_mov = true;
801
802 /* The texture sample instructions need to coord in successive
803 * registers/components (ie. src.xy but not src.yx). And TXP
804 * needs the .w component in .z for 2D.. so in some cases we
805 * might need to emit some mov instructions to shuffle things
806 * around:
807 */
808 for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
809 if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
810 needs_mov = true;
811
812 if (needs_mov) {
813 struct tgsi_dst_register tmp_dst;
814 struct tgsi_src_register *tmp_src;
815 unsigned j;
816
817 type_t type_mov = get_ftype(ctx);
818
819 /* need to move things around: */
820 tmp_src = get_internal_temp(ctx, &tmp_dst);
821
822 for (j = 0; (j < 4) && (order[j] >= 0); j++) {
823 instr = instr_create(ctx, 1, 0);
824 instr->cat1.src_type = type_mov;
825 instr->cat1.dst_type = type_mov;
826 add_dst_reg(ctx, instr, &tmp_dst, j);
827 add_src_reg(ctx, instr, coord,
828 src_swiz(coord, order[j]));
829 }
830
831 coord = tmp_src;
832
833 add_nop(ctx, 4 - j);
834 }
835
836 instr = instr_create(ctx, 5, t->opc);
837 instr->cat5.type = get_ftype(ctx);
838 instr->cat5.samp = samp->Index;
839 instr->cat5.tex = samp->Index;
840 instr->flags |= flags;
841
842 r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
843 r->wrmask = inst->Dst[0].Register.WriteMask;
844
845 add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask;
846
847 /* after add_src_reg() so we don't set (sy) on sam instr itself! */
848 regmask_set(&ctx->needs_sy, r);
849 }
850
851 /*
852 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
853 * cmps.f.eq tmp0, b, a
854 * cov.u16f16 dst, tmp0
855 *
856 * SNE(a,b) = (a != b) ? 1.0 : 0.0
857 * cmps.f.eq tmp0, b, a
858 * add.s tmp0, tmp0, -1
859 * sel.f16 dst, {0.0}, tmp0, {1.0}
860 *
861 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
862 * cmps.f.ge tmp0, a, b
863 * cov.u16f16 dst, tmp0
864 *
865 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
866 * cmps.f.ge tmp0, b, a
867 * cov.u16f16 dst, tmp0
868 *
869 * SGT(a,b) = (a > b) ? 1.0 : 0.0
870 * cmps.f.ge tmp0, b, a
871 * add.s tmp0, tmp0, -1
872 * sel.f16 dst, {0.0}, tmp0, {1.0}
873 *
874 * SLT(a,b) = (a < b) ? 1.0 : 0.0
875 * cmps.f.ge tmp0, a, b
876 * add.s tmp0, tmp0, -1
877 * sel.f16 dst, {0.0}, tmp0, {1.0}
878 *
879 * CMP(a,b,c) = (a < 0.0) ? b : c
880 * cmps.f.ge tmp0, a, {0.0}
881 * add.s tmp0, tmp0, -1
882 * sel.f16 dst, c, tmp0, b
883 */
884 static void
885 trans_cmp(const struct instr_translater *t,
886 struct fd3_compile_context *ctx,
887 struct tgsi_full_instruction *inst)
888 {
889 struct ir3_instruction *instr;
890 struct tgsi_dst_register tmp_dst;
891 struct tgsi_src_register *tmp_src;
892 struct tgsi_src_register constval0, constval1;
893 /* final instruction for CMP() uses orig src1 and src2: */
894 struct tgsi_dst_register *dst = get_dst(ctx, inst);
895 struct tgsi_src_register *a0, *a1;
896 unsigned condition;
897
898 tmp_src = get_internal_temp(ctx, &tmp_dst);
899
900 switch (t->tgsi_opc) {
901 case TGSI_OPCODE_SEQ:
902 case TGSI_OPCODE_SNE:
903 a0 = &inst->Src[1].Register; /* b */
904 a1 = &inst->Src[0].Register; /* a */
905 condition = IR3_COND_EQ;
906 break;
907 case TGSI_OPCODE_SGE:
908 case TGSI_OPCODE_SLT:
909 a0 = &inst->Src[0].Register; /* a */
910 a1 = &inst->Src[1].Register; /* b */
911 condition = IR3_COND_GE;
912 break;
913 case TGSI_OPCODE_SLE:
914 case TGSI_OPCODE_SGT:
915 a0 = &inst->Src[1].Register; /* b */
916 a1 = &inst->Src[0].Register; /* a */
917 condition = IR3_COND_GE;
918 break;
919 case TGSI_OPCODE_CMP:
920 get_immediate(ctx, &constval0, fui(0.0));
921 a0 = &inst->Src[0].Register; /* a */
922 a1 = &constval0; /* {0.0} */
923 condition = IR3_COND_GE;
924 break;
925 default:
926 compile_assert(ctx, 0);
927 return;
928 }
929
930 if (is_const(a0) && is_const(a1))
931 a0 = get_unconst(ctx, a0);
932
933 /* cmps.f.ge tmp, a0, a1 */
934 instr = instr_create(ctx, 2, OPC_CMPS_F);
935 instr->cat2.condition = condition;
936 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
937
938 switch (t->tgsi_opc) {
939 case TGSI_OPCODE_SEQ:
940 case TGSI_OPCODE_SGE:
941 case TGSI_OPCODE_SLE:
942 /* cov.u16f16 dst, tmp0 */
943 instr = instr_create(ctx, 1, 0);
944 instr->cat1.src_type = get_utype(ctx);
945 instr->cat1.dst_type = get_ftype(ctx);
946 vectorize(ctx, instr, dst, 1, tmp_src, 0);
947 break;
948 case TGSI_OPCODE_SNE:
949 case TGSI_OPCODE_SGT:
950 case TGSI_OPCODE_SLT:
951 case TGSI_OPCODE_CMP:
952 /* add.s tmp, tmp, -1 */
953 instr = instr_create(ctx, 2, OPC_ADD_S);
954 vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
955
956 if (t->tgsi_opc == TGSI_OPCODE_CMP) {
957 /* sel.{f32,f16} dst, src2, tmp, src1 */
958 instr = instr_create(ctx, 3,
959 ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
960 vectorize(ctx, instr, dst, 3,
961 &inst->Src[2].Register, 0,
962 tmp_src, 0,
963 &inst->Src[1].Register, 0);
964 } else {
965 get_immediate(ctx, &constval0, fui(0.0));
966 get_immediate(ctx, &constval1, fui(1.0));
967 /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
968 instr = instr_create(ctx, 3,
969 ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
970 vectorize(ctx, instr, dst, 3,
971 &constval0, 0, tmp_src, 0, &constval1, 0);
972 }
973
974 break;
975 }
976
977 put_dst(ctx, inst, dst);
978 }
979
980 /*
981 * Conditional / Flow control
982 */
983
984 static unsigned
985 find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
986 {
987 unsigned i;
988 for (i = 0; i < ctx->ir->instrs_count; i++)
989 if (ctx->ir->instrs[i] == instr)
990 return i;
991 return ~0;
992 }
993
994 static void
995 push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
996 {
997 ctx->branch[ctx->branch_count++] = instr;
998 }
999
1000 static void
1001 pop_branch(struct fd3_compile_context *ctx)
1002 {
1003 struct ir3_instruction *instr;
1004
1005 /* if we were clever enough, we'd patch this up after the fact,
1006 * and set (jp) flag on whatever the next instruction was, rather
1007 * than inserting an extra nop..
1008 */
1009 instr = instr_create(ctx, 0, OPC_NOP);
1010 instr->flags |= IR3_INSTR_JP;
1011
1012 /* pop the branch instruction from the stack and fix up branch target: */
1013 instr = ctx->branch[--ctx->branch_count];
1014 instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
1015 }
1016
1017 /* We probably don't really want to translate if/else/endif into branches..
1018 * the blob driver evaluates both legs of the if and then uses the sel
1019 * instruction to pick which sides of the branch to "keep".. but figuring
1020 * that out will take somewhat more compiler smarts. So hopefully branches
1021 * don't kill performance too badly.
1022 */
1023 static void
1024 trans_if(const struct instr_translater *t,
1025 struct fd3_compile_context *ctx,
1026 struct tgsi_full_instruction *inst)
1027 {
1028 struct ir3_instruction *instr;
1029 struct tgsi_src_register *src = &inst->Src[0].Register;
1030 struct tgsi_src_register constval;
1031
1032 get_immediate(ctx, &constval, fui(0.0));
1033
1034 if (is_const(src))
1035 src = get_unconst(ctx, src);
1036
1037 instr = instr_create(ctx, 2, OPC_CMPS_F);
1038 ir3_reg_create(instr, regid(REG_P0, 0), 0);
1039 add_src_reg(ctx, instr, src, src->SwizzleX);
1040 add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1041 instr->cat2.condition = IR3_COND_EQ;
1042
1043 instr = instr_create(ctx, 0, OPC_BR);
1044 push_branch(ctx, instr);
1045 }
1046
1047 static void
1048 trans_else(const struct instr_translater *t,
1049 struct fd3_compile_context *ctx,
1050 struct tgsi_full_instruction *inst)
1051 {
1052 struct ir3_instruction *instr;
1053
1054 /* for first half of if/else/endif, generate a jump past the else: */
1055 instr = instr_create(ctx, 0, OPC_JUMP);
1056
1057 pop_branch(ctx);
1058 push_branch(ctx, instr);
1059 }
1060
1061 static void
1062 trans_endif(const struct instr_translater *t,
1063 struct fd3_compile_context *ctx,
1064 struct tgsi_full_instruction *inst)
1065 {
1066 pop_branch(ctx);
1067 }
1068
1069 /*
1070 * Handlers for TGSI instructions which do have 1:1 mapping to native
1071 * instructions:
1072 */
1073
1074 static void
1075 instr_cat0(const struct instr_translater *t,
1076 struct fd3_compile_context *ctx,
1077 struct tgsi_full_instruction *inst)
1078 {
1079 instr_create(ctx, 0, t->opc);
1080 }
1081
1082 static void
1083 instr_cat1(const struct instr_translater *t,
1084 struct fd3_compile_context *ctx,
1085 struct tgsi_full_instruction *inst)
1086 {
1087 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1088 struct tgsi_src_register *src = &inst->Src[0].Register;
1089
1090 /* mov instructions can't handle a negate on src: */
1091 if (src->Negate) {
1092 struct tgsi_src_register constval;
1093 struct ir3_instruction *instr;
1094
1095 /* since right now, we are using uniformly either TYPE_F16 or
1096 * TYPE_F32, and we don't utilize the conversion possibilities
1097 * of mov instructions, we can get away with substituting an
1098 * add.f which can handle negate. Might need to revisit this
1099 * in the future if we start supporting widening/narrowing or
1100 * conversion to/from integer..
1101 */
1102 instr = instr_create(ctx, 2, OPC_ADD_F);
1103 get_immediate(ctx, &constval, fui(0.0));
1104 vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
1105 } else {
1106 create_mov(ctx, dst, src);
1107 /* create_mov() generates vector sequence, so no vectorize() */
1108 }
1109 put_dst(ctx, inst, dst);
1110 }
1111
1112 static void
1113 instr_cat2(const struct instr_translater *t,
1114 struct fd3_compile_context *ctx,
1115 struct tgsi_full_instruction *inst)
1116 {
1117 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1118 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1119 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1120 struct ir3_instruction *instr;
1121 unsigned src0_flags = 0, src1_flags = 0;
1122
1123 switch (t->tgsi_opc) {
1124 case TGSI_OPCODE_ABS:
1125 src0_flags = IR3_REG_ABS;
1126 break;
1127 case TGSI_OPCODE_SUB:
1128 src1_flags = IR3_REG_NEGATE;
1129 break;
1130 }
1131
1132 switch (t->opc) {
1133 case OPC_ABSNEG_F:
1134 case OPC_ABSNEG_S:
1135 case OPC_CLZ_B:
1136 case OPC_CLZ_S:
1137 case OPC_SIGN_F:
1138 case OPC_FLOOR_F:
1139 case OPC_CEIL_F:
1140 case OPC_RNDNE_F:
1141 case OPC_RNDAZ_F:
1142 case OPC_TRUNC_F:
1143 case OPC_NOT_B:
1144 case OPC_BFREV_B:
1145 case OPC_SETRM:
1146 case OPC_CBITS_B:
1147 /* these only have one src reg */
1148 instr = instr_create(ctx, 2, t->opc);
1149 vectorize(ctx, instr, dst, 1, src0, src0_flags);
1150 break;
1151 default:
1152 if (is_const(src0) && is_const(src1))
1153 src0 = get_unconst(ctx, src0);
1154
1155 instr = instr_create(ctx, 2, t->opc);
1156 vectorize(ctx, instr, dst, 2, src0, src0_flags,
1157 src1, src1_flags);
1158 break;
1159 }
1160
1161 put_dst(ctx, inst, dst);
1162 }
1163
1164 static bool is_mad(opc_t opc)
1165 {
1166 switch (opc) {
1167 case OPC_MAD_U16:
1168 case OPC_MADSH_U16:
1169 case OPC_MAD_S16:
1170 case OPC_MADSH_M16:
1171 case OPC_MAD_U24:
1172 case OPC_MAD_S24:
1173 case OPC_MAD_F16:
1174 case OPC_MAD_F32:
1175 return true;
1176 default:
1177 return false;
1178 }
1179 }
1180
1181 static void
1182 instr_cat3(const struct instr_translater *t,
1183 struct fd3_compile_context *ctx,
1184 struct tgsi_full_instruction *inst)
1185 {
1186 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1187 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1188 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1189 struct ir3_instruction *instr;
1190
1191 /* in particular, can't handle const for src1 for cat3..
1192 * for mad, we can swap first two src's if needed:
1193 */
1194 if (is_rel_or_const(src1)) {
1195 if (is_mad(t->opc) && !is_rel_or_const(src0)) {
1196 struct tgsi_src_register *tmp;
1197 tmp = src0;
1198 src0 = src1;
1199 src1 = tmp;
1200 } else {
1201 src1 = get_unconst(ctx, src1);
1202 }
1203 }
1204
1205 instr = instr_create(ctx, 3,
1206 ctx->so->half_precision ? t->hopc : t->opc);
1207 vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
1208 &inst->Src[2].Register, 0);
1209 put_dst(ctx, inst, dst);
1210 }
1211
1212 static void
1213 instr_cat4(const struct instr_translater *t,
1214 struct fd3_compile_context *ctx,
1215 struct tgsi_full_instruction *inst)
1216 {
1217 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1218 struct tgsi_src_register *src = &inst->Src[0].Register;
1219 struct ir3_instruction *instr;
1220 unsigned i, n;
1221
1222 /* seems like blob compiler avoids const as src.. */
1223 if (is_const(src))
1224 src = get_unconst(ctx, src);
1225
1226 /* worst case: */
1227 add_nop(ctx, 6);
1228
1229 /* we need to replicate into each component: */
1230 for (i = 0, n = 0; i < 4; i++) {
1231 if (dst->WriteMask & (1 << i)) {
1232 if (n++)
1233 add_nop(ctx, 1);
1234 instr = instr_create(ctx, 4, t->opc);
1235 add_dst_reg(ctx, instr, dst, i);
1236 add_src_reg(ctx, instr, src, src->SwizzleX);
1237 }
1238 }
1239
1240 regmask_set(&ctx->needs_ss, instr->regs[0]);
1241 put_dst(ctx, inst, dst);
1242 }
1243
1244 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1245 #define INSTR(n, f, ...) \
1246 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1247
1248 INSTR(MOV, instr_cat1),
1249 INSTR(RCP, instr_cat4, .opc = OPC_RCP),
1250 INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
1251 INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
1252 INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
1253 INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
1254 INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
1255 INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
1256 INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
1257 INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1258 INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
1259 INSTR(CLAMP, trans_clamp),
1260 INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
1261 INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
1262 INSTR(ARL, trans_arl),
1263 INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
1264 INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
1265 INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
1266 INSTR(COS, instr_cat4, .opc = OPC_COS),
1267 INSTR(SIN, instr_cat4, .opc = OPC_SIN),
1268 INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1269 INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
1270 INSTR(SGT, trans_cmp),
1271 INSTR(SLT, trans_cmp),
1272 INSTR(SGE, trans_cmp),
1273 INSTR(SLE, trans_cmp),
1274 INSTR(SNE, trans_cmp),
1275 INSTR(SEQ, trans_cmp),
1276 INSTR(CMP, trans_cmp),
1277 INSTR(IF, trans_if),
1278 INSTR(ELSE, trans_else),
1279 INSTR(ENDIF, trans_endif),
1280 INSTR(END, instr_cat0, .opc = OPC_END),
1281 INSTR(KILL, instr_cat0, .opc = OPC_KILL),
1282 };
1283
1284 static fd3_semantic
1285 decl_semantic(const struct tgsi_declaration_semantic *sem)
1286 {
1287 return fd3_semantic_name(sem->Name, sem->Index);
1288 }
1289
1290 static int
1291 decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1292 {
1293 struct fd3_shader_stateobj *so = ctx->so;
1294 unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
1295 unsigned i, flags = 0;
1296 int nop = 0;
1297
1298 /* I don't think we should get frag shader input without
1299 * semantic info? Otherwise how do inputs get linked to
1300 * vert outputs?
1301 */
1302 compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
1303 decl->Declaration.Semantic);
1304
1305 if (ctx->so->half_precision)
1306 flags |= IR3_REG_HALF;
1307
1308 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1309 unsigned n = so->inputs_count++;
1310 unsigned r = regid(i + base, 0);
1311 unsigned ncomp;
1312
1313 /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1314 ncomp = 4;
1315
1316 DBG("decl in -> r%d", i + base); // XXX
1317
1318 so->inputs[n].semantic = decl_semantic(&decl->Semantic);
1319 so->inputs[n].compmask = (1 << ncomp) - 1;
1320 so->inputs[n].regid = r;
1321 so->inputs[n].inloc = ctx->next_inloc;
1322 ctx->next_inloc += ncomp;
1323
1324 so->total_in += ncomp;
1325
1326 /* for frag shaders, we need to generate the corresponding bary instr: */
1327 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1328 unsigned j;
1329
1330 for (j = 0; j < ncomp; j++) {
1331 struct ir3_instruction *instr;
1332 struct ir3_register *dst;
1333
1334 instr = instr_create(ctx, 2, OPC_BARY_F);
1335
1336 /* dst register: */
1337 dst = ir3_reg_create(instr, r + j, flags);
1338 ctx->last_input = dst;
1339
1340 /* input position: */
1341 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val =
1342 so->inputs[n].inloc + j - 8;
1343
1344 /* input base (always r0.xy): */
1345 ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3;
1346 }
1347
1348 nop = 6;
1349 }
1350 }
1351
1352 return nop;
1353 }
1354
1355 static void
1356 decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1357 {
1358 struct fd3_shader_stateobj *so = ctx->so;
1359 unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1360 unsigned comp = 0;
1361 unsigned name = decl->Semantic.Name;
1362 unsigned i;
1363
1364 compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
1365
1366 DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
1367
1368 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
1369 switch (name) {
1370 case TGSI_SEMANTIC_POSITION:
1371 so->writes_pos = true;
1372 /* fallthrough */
1373 case TGSI_SEMANTIC_PSIZE:
1374 case TGSI_SEMANTIC_COLOR:
1375 case TGSI_SEMANTIC_GENERIC:
1376 case TGSI_SEMANTIC_FOG:
1377 case TGSI_SEMANTIC_TEXCOORD:
1378 break;
1379 default:
1380 compile_error(ctx, "unknown VS semantic name: %s\n",
1381 tgsi_semantic_names[name]);
1382 }
1383 } else {
1384 switch (name) {
1385 case TGSI_SEMANTIC_POSITION:
1386 comp = 2; /* tgsi will write to .z component */
1387 so->writes_pos = true;
1388 /* fallthrough */
1389 case TGSI_SEMANTIC_COLOR:
1390 break;
1391 default:
1392 compile_error(ctx, "unknown FS semantic name: %s\n",
1393 tgsi_semantic_names[name]);
1394 }
1395 }
1396
1397 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1398 unsigned n = so->outputs_count++;
1399 so->outputs[n].semantic = decl_semantic(&decl->Semantic);
1400 so->outputs[n].regid = regid(i + base, comp);
1401 }
1402 }
1403
1404 static void
1405 decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1406 {
1407 ctx->so->samplers_count++;
1408 }
1409
1410 static void
1411 compile_instructions(struct fd3_compile_context *ctx)
1412 {
1413 struct ir3_shader *ir = ctx->ir;
1414 int nop = 0;
1415
1416 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1417 tgsi_parse_token(&ctx->parser);
1418
1419 switch (ctx->parser.FullToken.Token.Type) {
1420 case TGSI_TOKEN_TYPE_DECLARATION: {
1421 struct tgsi_full_declaration *decl =
1422 &ctx->parser.FullToken.FullDeclaration;
1423 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
1424 decl_out(ctx, decl);
1425 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
1426 nop = decl_in(ctx, decl);
1427 } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
1428 decl_samp(ctx, decl);
1429 }
1430 break;
1431 }
1432 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1433 /* TODO: if we know the immediate is small enough, and only
1434 * used with instructions that can embed an immediate, we
1435 * can skip this:
1436 */
1437 struct tgsi_full_immediate *imm =
1438 &ctx->parser.FullToken.FullImmediate;
1439 unsigned n = ctx->so->immediates_count++;
1440 memcpy(ctx->so->immediates[n].val, imm->u, 16);
1441 break;
1442 }
1443 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1444 struct tgsi_full_instruction *inst =
1445 &ctx->parser.FullToken.FullInstruction;
1446 unsigned opc = inst->Instruction.Opcode;
1447 const struct instr_translater *t = &translaters[opc];
1448
1449 add_nop(ctx, nop);
1450 nop = 0;
1451
1452 if (t->fxn) {
1453 t->fxn(t, ctx, inst);
1454 ctx->num_internal_temps = 0;
1455 } else {
1456 compile_error(ctx, "unknown TGSI opc: %s\n",
1457 tgsi_get_opcode_name(opc));
1458 }
1459
1460 switch (inst->Instruction.Saturate) {
1461 case TGSI_SAT_ZERO_ONE:
1462 create_clamp_imm(ctx, &inst->Dst[0].Register,
1463 fui(0.0), fui(1.0));
1464 break;
1465 case TGSI_SAT_MINUS_PLUS_ONE:
1466 create_clamp_imm(ctx, &inst->Dst[0].Register,
1467 fui(-1.0), fui(1.0));
1468 break;
1469 }
1470
1471 break;
1472 }
1473 default:
1474 break;
1475 }
1476 }
1477
1478 if (ir->instrs_count > 0)
1479 ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1480
1481 if (ctx->last_input)
1482 ctx->last_input->flags |= IR3_REG_EI;
1483
1484 handle_last_rel(ctx);
1485 }
1486
1487 int
1488 fd3_compile_shader(struct fd3_shader_stateobj *so,
1489 const struct tgsi_token *tokens)
1490 {
1491 struct fd3_compile_context ctx;
1492
1493 assert(!so->ir);
1494
1495 so->ir = ir3_shader_create();
1496
1497 assert(so->ir);
1498
1499 if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
1500 return -1;
1501
1502 compile_instructions(&ctx);
1503
1504 compile_free(&ctx);
1505
1506 return 0;
1507 }