2c32c0fa2a7602c8d964f5d431aa02e3b1cfa295
[mesa.git] / src / gallium / drivers / freedreno / a3xx / fd3_compiler.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include <stdarg.h>
30
31 #include "pipe/p_state.h"
32 #include "util/u_string.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_strings.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_scan.h"
41
42 #include "fd3_compiler.h"
43 #include "fd3_program.h"
44 #include "fd3_util.h"
45
46 #include "instr-a3xx.h"
47 #include "ir-a3xx.h"
48
49
50 struct fd3_compile_context {
51 const struct tgsi_token *tokens;
52 struct ir3_shader *ir;
53 struct fd3_shader_stateobj *so;
54
55 struct tgsi_parse_context parser;
56 unsigned type;
57
58 struct tgsi_shader_info info;
59
60 /* last input dst (for setting (ei) flag): */
61 struct ir3_register *last_input;
62
63 /* last instruction with relative addressing: */
64 struct ir3_instruction *last_rel;
65
66 /* for calculating input/output positions/linkages: */
67 unsigned next_inloc;
68
69 unsigned num_internal_temps;
70 struct tgsi_src_register internal_temps[6];
71
72 /* track registers which need to synchronize w/ "complex alu" cat3
73 * instruction pipeline:
74 */
75 regmask_t needs_ss;
76
77 /* track registers which need to synchronize with texture fetch
78 * pipeline:
79 */
80 regmask_t needs_sy;
81
82 /* inputs start at r0, temporaries start after last input, and
83 * outputs start after last temporary.
84 *
85 * We could be more clever, because this is not a hw restriction,
86 * but probably best just to implement an optimizing pass to
87 * reduce the # of registers used and get rid of redundant mov's
88 * (to output register).
89 */
90 unsigned base_reg[TGSI_FILE_COUNT];
91
92 /* idx/slot for last compiler generated immediate */
93 unsigned immediate_idx;
94
95 /* stack of branch instructions that start (potentially nested)
96 * branch instructions, so that we can fix up the branch targets
97 * so that we can fix up the branch target on the corresponding
98 * END instruction
99 */
100 struct ir3_instruction *branch[16];
101 unsigned int branch_count;
102
103 /* used when dst is same as one of the src, to avoid overwriting a
104 * src element before the remaining scalar instructions that make
105 * up the vector operation
106 */
107 struct tgsi_dst_register tmp_dst;
108 struct tgsi_src_register *tmp_src;
109 };
110
111
112 static void vectorize(struct fd3_compile_context *ctx,
113 struct ir3_instruction *instr, struct tgsi_dst_register *dst,
114 int nsrcs, ...);
115 static void create_mov(struct fd3_compile_context *ctx,
116 struct tgsi_dst_register *dst, struct tgsi_src_register *src);
117
118 static unsigned
119 compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
120 const struct tgsi_token *tokens)
121 {
122 unsigned ret, base = 0;
123 struct tgsi_shader_info *info = &ctx->info;
124
125 ctx->tokens = tokens;
126 ctx->ir = so->ir;
127 ctx->so = so;
128 ctx->last_input = NULL;
129 ctx->last_rel = NULL;
130 ctx->next_inloc = 8;
131 ctx->num_internal_temps = 0;
132 ctx->branch_count = 0;
133
134 regmask_init(&ctx->needs_ss);
135 regmask_init(&ctx->needs_sy);
136 memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
137
138 tgsi_scan_shader(tokens, &ctx->info);
139
140 /* Immediates go after constants: */
141 ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
142 ctx->base_reg[TGSI_FILE_IMMEDIATE] =
143 info->file_max[TGSI_FILE_CONSTANT] + 1;
144
145 /* if full precision and fragment shader, don't clobber
146 * r0.x w/ bary fetch:
147 */
148 if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
149 base = 1;
150
151 /* Temporaries after outputs after inputs: */
152 ctx->base_reg[TGSI_FILE_INPUT] = base;
153 ctx->base_reg[TGSI_FILE_OUTPUT] = base +
154 info->file_max[TGSI_FILE_INPUT] + 1;
155 ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
156 info->file_max[TGSI_FILE_INPUT] + 1 +
157 info->file_max[TGSI_FILE_OUTPUT] + 1;
158
159 so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
160 ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
161
162 ret = tgsi_parse_init(&ctx->parser, tokens);
163 if (ret != TGSI_PARSE_OK)
164 return ret;
165
166 ctx->type = ctx->parser.FullHeader.Processor.Processor;
167
168 return ret;
169 }
170
171 static void
172 compile_error(struct fd3_compile_context *ctx, const char *format, ...)
173 {
174 va_list ap;
175 va_start(ap, format);
176 _debug_vprintf(format, ap);
177 va_end(ap);
178 tgsi_dump(ctx->tokens, 0);
179 assert(0);
180 }
181
182 #define compile_assert(ctx, cond) do { \
183 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
184 } while (0)
185
186 static void
187 compile_free(struct fd3_compile_context *ctx)
188 {
189 tgsi_parse_free(&ctx->parser);
190 }
191
192 struct instr_translater {
193 void (*fxn)(const struct instr_translater *t,
194 struct fd3_compile_context *ctx,
195 struct tgsi_full_instruction *inst);
196 unsigned tgsi_opc;
197 opc_t opc;
198 opc_t hopc; /* opc to use for half_precision mode, if different */
199 unsigned arg;
200 };
201
202 static void
203 handle_last_rel(struct fd3_compile_context *ctx)
204 {
205 if (ctx->last_rel) {
206 ctx->last_rel->flags |= IR3_INSTR_UL;
207 ctx->last_rel = NULL;
208 }
209 }
210
211 static void
212 add_nop(struct fd3_compile_context *ctx, unsigned count)
213 {
214 while (count-- > 0)
215 ir3_instr_create(ctx->ir, 0, OPC_NOP);
216 }
217
218 static unsigned
219 src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
220 {
221 unsigned flags = 0;
222
223 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
224 return flags;
225
226 if (regmask_get(&ctx->needs_ss, reg)) {
227 flags |= IR3_INSTR_SS;
228 regmask_init(&ctx->needs_ss);
229 }
230
231 if (regmask_get(&ctx->needs_sy, reg)) {
232 flags |= IR3_INSTR_SY;
233 regmask_init(&ctx->needs_sy);
234 }
235
236 return flags;
237 }
238
239 static struct ir3_register *
240 add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
241 const struct tgsi_dst_register *dst, unsigned chan)
242 {
243 unsigned flags = 0, num = 0;
244
245 switch (dst->File) {
246 case TGSI_FILE_OUTPUT:
247 case TGSI_FILE_TEMPORARY:
248 num = dst->Index + ctx->base_reg[dst->File];
249 break;
250 case TGSI_FILE_ADDRESS:
251 num = REG_A0;
252 break;
253 default:
254 compile_error(ctx, "unsupported dst register file: %s\n",
255 tgsi_file_name(dst->File));
256 break;
257 }
258
259 if (ctx->so->half_precision)
260 flags |= IR3_REG_HALF;
261
262 return ir3_reg_create(instr, regid(num, chan), flags);
263 }
264
265 static struct ir3_register *
266 add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
267 const struct tgsi_src_register *src, unsigned chan)
268 {
269 unsigned flags = 0, num = 0;
270 struct ir3_register *reg;
271
272 /* TODO we need to use a mov to temp for const >= 64.. or maybe
273 * we could use relative addressing..
274 */
275 compile_assert(ctx, src->Index < 64);
276
277 switch (src->File) {
278 case TGSI_FILE_IMMEDIATE:
279 /* TODO if possible, use actual immediate instead of const.. but
280 * TGSI has vec4 immediates, we can only embed scalar (of limited
281 * size, depending on instruction..)
282 */
283 case TGSI_FILE_CONSTANT:
284 flags |= IR3_REG_CONST;
285 num = src->Index + ctx->base_reg[src->File];
286 break;
287 case TGSI_FILE_OUTPUT:
288 /* NOTE: we should only end up w/ OUTPUT file for things like
289 * clamp()'ing saturated dst instructions
290 */
291 case TGSI_FILE_INPUT:
292 case TGSI_FILE_TEMPORARY:
293 num = src->Index + ctx->base_reg[src->File];
294 break;
295 default:
296 compile_error(ctx, "unsupported src register file: %s\n",
297 tgsi_file_name(src->File));
298 break;
299 }
300
301 if (src->Absolute)
302 flags |= IR3_REG_ABS;
303 if (src->Negate)
304 flags |= IR3_REG_NEGATE;
305 if (src->Indirect)
306 flags |= IR3_REG_RELATIV;
307 if (ctx->so->half_precision)
308 flags |= IR3_REG_HALF;
309
310 reg = ir3_reg_create(instr, regid(num, chan), flags);
311
312 if (src->Indirect)
313 ctx->last_rel = instr;
314
315 instr->flags |= src_flags(ctx, reg);
316
317 return reg;
318 }
319
320 static void
321 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
322 {
323 src->File = dst->File;
324 src->Indirect = dst->Indirect;
325 src->Dimension = dst->Dimension;
326 src->Index = dst->Index;
327 src->Absolute = 0;
328 src->Negate = 0;
329 src->SwizzleX = TGSI_SWIZZLE_X;
330 src->SwizzleY = TGSI_SWIZZLE_Y;
331 src->SwizzleZ = TGSI_SWIZZLE_Z;
332 src->SwizzleW = TGSI_SWIZZLE_W;
333 }
334
335 /* Get internal-temp src/dst to use for a sequence of instructions
336 * generated by a single TGSI op.
337 */
338 static struct tgsi_src_register *
339 get_internal_temp(struct fd3_compile_context *ctx,
340 struct tgsi_dst_register *tmp_dst)
341 {
342 struct tgsi_src_register *tmp_src;
343 int n;
344
345 tmp_dst->File = TGSI_FILE_TEMPORARY;
346 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
347 tmp_dst->Indirect = 0;
348 tmp_dst->Dimension = 0;
349
350 /* assign next temporary: */
351 n = ctx->num_internal_temps++;
352 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
353 tmp_src = &ctx->internal_temps[n];
354
355 tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
356
357 src_from_dst(tmp_src, tmp_dst);
358
359 return tmp_src;
360 }
361
362 /* Get internal half-precision temp src/dst to use for a sequence of
363 * instructions generated by a single TGSI op.
364 */
365 static struct tgsi_src_register *
366 get_internal_temp_hr(struct fd3_compile_context *ctx,
367 struct tgsi_dst_register *tmp_dst)
368 {
369 struct tgsi_src_register *tmp_src;
370 int n;
371
372 if (ctx->so->half_precision)
373 return get_internal_temp(ctx, tmp_dst);
374
375 tmp_dst->File = TGSI_FILE_TEMPORARY;
376 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
377 tmp_dst->Indirect = 0;
378 tmp_dst->Dimension = 0;
379
380 /* assign next temporary: */
381 n = ctx->num_internal_temps++;
382 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
383 tmp_src = &ctx->internal_temps[n];
384
385 /* just use hr0 because no one else should be using half-
386 * precision regs:
387 */
388 tmp_dst->Index = 0;
389
390 src_from_dst(tmp_src, tmp_dst);
391
392 return tmp_src;
393 }
394
395 static inline bool
396 is_const(struct tgsi_src_register *src)
397 {
398 return (src->File == TGSI_FILE_CONSTANT) ||
399 (src->File == TGSI_FILE_IMMEDIATE);
400 }
401
402 static inline bool
403 is_relative(struct tgsi_src_register *src)
404 {
405 return src->Indirect;
406 }
407
408 static inline bool
409 is_rel_or_const(struct tgsi_src_register *src)
410 {
411 return is_relative(src) || is_const(src);
412 }
413
414 static type_t
415 get_ftype(struct fd3_compile_context *ctx)
416 {
417 return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
418 }
419
420 static type_t
421 get_utype(struct fd3_compile_context *ctx)
422 {
423 return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
424 }
425
426 static unsigned
427 src_swiz(struct tgsi_src_register *src, int chan)
428 {
429 switch (chan) {
430 case 0: return src->SwizzleX;
431 case 1: return src->SwizzleY;
432 case 2: return src->SwizzleZ;
433 case 3: return src->SwizzleW;
434 }
435 assert(0);
436 return 0;
437 }
438
439 /* for instructions that cannot take a const register as src, if needed
440 * generate a move to temporary gpr:
441 */
442 static struct tgsi_src_register *
443 get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
444 {
445 struct tgsi_dst_register tmp_dst;
446 struct tgsi_src_register *tmp_src;
447
448 compile_assert(ctx, is_rel_or_const(src));
449
450 tmp_src = get_internal_temp(ctx, &tmp_dst);
451
452 create_mov(ctx, &tmp_dst, src);
453
454 return tmp_src;
455 }
456
457 static void
458 get_immediate(struct fd3_compile_context *ctx,
459 struct tgsi_src_register *reg, uint32_t val)
460 {
461 unsigned neg, swiz, idx, i;
462 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
463 static const unsigned swiz2tgsi[] = {
464 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
465 };
466
467 for (i = 0; i < ctx->immediate_idx; i++) {
468 swiz = i % 4;
469 idx = i / 4;
470
471 if (ctx->so->immediates[idx].val[swiz] == val) {
472 neg = 0;
473 break;
474 }
475
476 if (ctx->so->immediates[idx].val[swiz] == -val) {
477 neg = 1;
478 break;
479 }
480 }
481
482 if (i == ctx->immediate_idx) {
483 /* need to generate a new immediate: */
484 swiz = i % 4;
485 idx = i / 4;
486 neg = 0;
487 ctx->so->immediates[idx].val[swiz] = val;
488 ctx->so->immediates_count = idx + 1;
489 ctx->immediate_idx++;
490 }
491
492 reg->File = TGSI_FILE_IMMEDIATE;
493 reg->Indirect = 0;
494 reg->Dimension = 0;
495 reg->Index = idx;
496 reg->Absolute = 0;
497 reg->Negate = neg;
498 reg->SwizzleX = swiz2tgsi[swiz];
499 reg->SwizzleY = swiz2tgsi[swiz];
500 reg->SwizzleZ = swiz2tgsi[swiz];
501 reg->SwizzleW = swiz2tgsi[swiz];
502 }
503
504 static void
505 create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
506 struct tgsi_src_register *src)
507 {
508 type_t type_mov = get_ftype(ctx);
509 unsigned i;
510
511 for (i = 0; i < 4; i++) {
512 /* move to destination: */
513 if (dst->WriteMask & (1 << i)) {
514 struct ir3_instruction *instr;
515
516 if (src->Absolute || src->Negate) {
517 /* can't have abs or neg on a mov instr, so use
518 * absneg.f instead to handle these cases:
519 */
520 instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F);
521 } else {
522 instr = ir3_instr_create(ctx->ir, 1, 0);
523 instr->cat1.src_type = type_mov;
524 instr->cat1.dst_type = type_mov;
525 }
526
527 add_dst_reg(ctx, instr, dst, i);
528 add_src_reg(ctx, instr, src, src_swiz(src, i));
529 } else {
530 add_nop(ctx, 1);
531 }
532 }
533 }
534
535 static void
536 create_clamp(struct fd3_compile_context *ctx,
537 struct tgsi_dst_register *dst, struct tgsi_src_register *val,
538 struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
539 {
540 struct ir3_instruction *instr;
541
542 instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
543 vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
544
545 instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
546 vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
547 }
548
549 static void
550 create_clamp_imm(struct fd3_compile_context *ctx,
551 struct tgsi_dst_register *dst,
552 uint32_t minval, uint32_t maxval)
553 {
554 struct tgsi_src_register minconst, maxconst;
555 struct tgsi_src_register src;
556
557 src_from_dst(&src, dst);
558
559 get_immediate(ctx, &minconst, minval);
560 get_immediate(ctx, &maxconst, maxval);
561
562 create_clamp(ctx, dst, &src, &minconst, &maxconst);
563 }
564
565 static struct tgsi_dst_register *
566 get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
567 {
568 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
569 unsigned i;
570 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
571 struct tgsi_src_register *src = &inst->Src[i].Register;
572 if ((src->File == dst->File) && (src->Index == dst->Index)) {
573 if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
574 (src->SwizzleX == TGSI_SWIZZLE_X) &&
575 (src->SwizzleY == TGSI_SWIZZLE_Y) &&
576 (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
577 (src->SwizzleW == TGSI_SWIZZLE_W))
578 continue;
579 ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
580 ctx->tmp_dst.WriteMask = dst->WriteMask;
581 dst = &ctx->tmp_dst;
582 break;
583 }
584 }
585 return dst;
586 }
587
588 static void
589 put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
590 struct tgsi_dst_register *dst)
591 {
592 /* if necessary, add mov back into original dst: */
593 if (dst != &inst->Dst[0].Register) {
594 create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
595 }
596 }
597
598 /* helper to generate the necessary repeat and/or additional instructions
599 * to turn a scalar instruction into a vector operation:
600 */
601 static void
602 vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
603 struct tgsi_dst_register *dst, int nsrcs, ...)
604 {
605 va_list ap;
606 int i, j, n = 0;
607 bool indirect = dst->Indirect;
608
609 add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
610
611 va_start(ap, nsrcs);
612 for (j = 0; j < nsrcs; j++) {
613 struct tgsi_src_register *src =
614 va_arg(ap, struct tgsi_src_register *);
615 unsigned flags = va_arg(ap, unsigned);
616 struct ir3_register *reg;
617 if (flags & IR3_REG_IMMED) {
618 reg = ir3_reg_create(instr, 0, IR3_REG_IMMED);
619 /* this is an ugly cast.. should have put flags first! */
620 reg->iim_val = *(int *)&src;
621 } else {
622 reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
623 indirect |= src->Indirect;
624 }
625 reg->flags |= flags & ~IR3_REG_NEGATE;
626 if (flags & IR3_REG_NEGATE)
627 reg->flags ^= IR3_REG_NEGATE;
628 }
629 va_end(ap);
630
631 for (i = 0; i < 4; i++) {
632 if (dst->WriteMask & (1 << i)) {
633 struct ir3_instruction *cur;
634
635 if (n++ == 0) {
636 cur = instr;
637 } else {
638 cur = ir3_instr_clone(instr);
639 cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
640 }
641
642 /* fix-up dst register component: */
643 cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
644
645 /* fix-up src register component: */
646 va_start(ap, nsrcs);
647 for (j = 0; j < nsrcs; j++) {
648 struct tgsi_src_register *src =
649 va_arg(ap, struct tgsi_src_register *);
650 unsigned flags = va_arg(ap, unsigned);
651 if (!(flags & IR3_REG_IMMED)) {
652 cur->regs[j+1]->num =
653 regid(cur->regs[j+1]->num >> 2,
654 src_swiz(src, i));
655 cur->flags |= src_flags(ctx, cur->regs[j+1]);
656 }
657 }
658 va_end(ap);
659
660 if (indirect)
661 ctx->last_rel = cur;
662 }
663 }
664
665 /* pad w/ nop's.. at least until we are clever enough to
666 * figure out if we really need to..
667 */
668 add_nop(ctx, 4 - n);
669 }
670
671 /*
672 * Handlers for TGSI instructions which do not have a 1:1 mapping to
673 * native instructions:
674 */
675
676 static void
677 trans_clamp(const struct instr_translater *t,
678 struct fd3_compile_context *ctx,
679 struct tgsi_full_instruction *inst)
680 {
681 struct tgsi_dst_register *dst = get_dst(ctx, inst);
682 struct tgsi_src_register *src0 = &inst->Src[0].Register;
683 struct tgsi_src_register *src1 = &inst->Src[1].Register;
684 struct tgsi_src_register *src2 = &inst->Src[2].Register;
685
686 create_clamp(ctx, dst, src0, src1, src2);
687
688 put_dst(ctx, inst, dst);
689 }
690
691 /* ARL(x) = x, but mova from hrN.x to a0.. */
692 static void
693 trans_arl(const struct instr_translater *t,
694 struct fd3_compile_context *ctx,
695 struct tgsi_full_instruction *inst)
696 {
697 struct ir3_instruction *instr;
698 struct tgsi_dst_register tmp_dst;
699 struct tgsi_src_register *tmp_src;
700 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
701 struct tgsi_src_register *src = &inst->Src[0].Register;
702 unsigned chan = src->SwizzleX;
703 compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
704
705 handle_last_rel(ctx);
706
707 tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
708
709 /* cov.{f32,f16}s16 Rtmp, Rsrc */
710 instr = ir3_instr_create(ctx->ir, 1, 0);
711 instr->cat1.src_type = get_ftype(ctx);
712 instr->cat1.dst_type = TYPE_S16;
713 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
714 add_src_reg(ctx, instr, src, chan);
715
716 add_nop(ctx, 3);
717
718 /* shl.b Rtmp, Rtmp, 2 */
719 instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
720 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
721 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
722 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
723
724 add_nop(ctx, 3);
725
726 /* mova a0, Rtmp */
727 instr = ir3_instr_create(ctx->ir, 1, 0);
728 instr->cat1.src_type = TYPE_S16;
729 instr->cat1.dst_type = TYPE_S16;
730 add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
731 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
732
733 /* need to ensure 5 instr slots before a0 is used: */
734 add_nop(ctx, 6);
735 }
736
737 /* texture fetch/sample instructions: */
738 static void
739 trans_samp(const struct instr_translater *t,
740 struct fd3_compile_context *ctx,
741 struct tgsi_full_instruction *inst)
742 {
743 struct ir3_register *r;
744 struct ir3_instruction *instr;
745 struct tgsi_src_register *coord = &inst->Src[0].Register;
746 struct tgsi_src_register *samp = &inst->Src[1].Register;
747 unsigned tex = inst->Texture.Texture;
748 int8_t *order;
749 unsigned i, flags = 0, src_wrmask;
750 bool needs_mov = false;
751
752 switch (t->arg) {
753 case TGSI_OPCODE_TEX:
754 if (tex == TGSI_TEXTURE_2D) {
755 order = (int8_t[4]){ 0, 1, -1, -1 };
756 src_wrmask = TGSI_WRITEMASK_XY;
757 } else {
758 order = (int8_t[4]){ 0, 1, 2, -1 };
759 src_wrmask = TGSI_WRITEMASK_XYZ;
760 }
761 break;
762 case TGSI_OPCODE_TXP:
763 if (tex == TGSI_TEXTURE_2D) {
764 order = (int8_t[4]){ 0, 1, 3, -1 };
765 src_wrmask = TGSI_WRITEMASK_XYZ;
766 } else {
767 order = (int8_t[4]){ 0, 1, 2, 3 };
768 src_wrmask = TGSI_WRITEMASK_XYZW;
769 }
770 flags |= IR3_INSTR_P;
771 break;
772 default:
773 compile_assert(ctx, 0);
774 break;
775 }
776
777 if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) {
778 add_nop(ctx, 3);
779 flags |= IR3_INSTR_3D;
780 }
781
782 /* cat5 instruction cannot seem to handle const or relative: */
783 if (is_rel_or_const(coord))
784 needs_mov = true;
785
786 /* The texture sample instructions need to coord in successive
787 * registers/components (ie. src.xy but not src.yx). And TXP
788 * needs the .w component in .z for 2D.. so in some cases we
789 * might need to emit some mov instructions to shuffle things
790 * around:
791 */
792 for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
793 if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
794 needs_mov = true;
795
796 if (needs_mov) {
797 struct tgsi_dst_register tmp_dst;
798 struct tgsi_src_register *tmp_src;
799 unsigned j;
800
801 type_t type_mov = get_ftype(ctx);
802
803 /* need to move things around: */
804 tmp_src = get_internal_temp(ctx, &tmp_dst);
805
806 for (j = 0; (j < 4) && (order[j] >= 0); j++) {
807 instr = ir3_instr_create(ctx->ir, 1, 0);
808 instr->cat1.src_type = type_mov;
809 instr->cat1.dst_type = type_mov;
810 add_dst_reg(ctx, instr, &tmp_dst, j);
811 add_src_reg(ctx, instr, coord,
812 src_swiz(coord, order[j]));
813 }
814
815 coord = tmp_src;
816
817 add_nop(ctx, 4 - j);
818 }
819
820 instr = ir3_instr_create(ctx->ir, 5, t->opc);
821 instr->cat5.type = get_ftype(ctx);
822 instr->cat5.samp = samp->Index;
823 instr->cat5.tex = samp->Index;
824 instr->flags |= flags;
825
826 r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
827 r->wrmask = inst->Dst[0].Register.WriteMask;
828
829 add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask;
830
831 /* after add_src_reg() so we don't set (sy) on sam instr itself! */
832 regmask_set(&ctx->needs_sy, r);
833 }
834
835 /*
836 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
837 * cmps.f.eq tmp0, b, a
838 * cov.u16f16 dst, tmp0
839 *
840 * SNE(a,b) = (a != b) ? 1.0 : 0.0
841 * cmps.f.eq tmp0, b, a
842 * add.s tmp0, tmp0, -1
843 * sel.f16 dst, {0.0}, tmp0, {1.0}
844 *
845 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
846 * cmps.f.ge tmp0, a, b
847 * cov.u16f16 dst, tmp0
848 *
849 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
850 * cmps.f.ge tmp0, b, a
851 * cov.u16f16 dst, tmp0
852 *
853 * SGT(a,b) = (a > b) ? 1.0 : 0.0
854 * cmps.f.ge tmp0, b, a
855 * add.s tmp0, tmp0, -1
856 * sel.f16 dst, {0.0}, tmp0, {1.0}
857 *
858 * SLT(a,b) = (a < b) ? 1.0 : 0.0
859 * cmps.f.ge tmp0, a, b
860 * add.s tmp0, tmp0, -1
861 * sel.f16 dst, {0.0}, tmp0, {1.0}
862 *
863 * CMP(a,b,c) = (a < 0.0) ? b : c
864 * cmps.f.ge tmp0, a, {0.0}
865 * add.s tmp0, tmp0, -1
866 * sel.f16 dst, c, tmp0, b
867 */
868 static void
869 trans_cmp(const struct instr_translater *t,
870 struct fd3_compile_context *ctx,
871 struct tgsi_full_instruction *inst)
872 {
873 struct ir3_instruction *instr;
874 struct tgsi_dst_register tmp_dst;
875 struct tgsi_src_register *tmp_src;
876 struct tgsi_src_register constval0, constval1;
877 /* final instruction for CMP() uses orig src1 and src2: */
878 struct tgsi_dst_register *dst = get_dst(ctx, inst);
879 struct tgsi_src_register *a0, *a1;
880 unsigned condition;
881
882 tmp_src = get_internal_temp(ctx, &tmp_dst);
883
884 switch (t->tgsi_opc) {
885 case TGSI_OPCODE_SEQ:
886 case TGSI_OPCODE_SNE:
887 a0 = &inst->Src[1].Register; /* b */
888 a1 = &inst->Src[0].Register; /* a */
889 condition = IR3_COND_EQ;
890 break;
891 case TGSI_OPCODE_SGE:
892 case TGSI_OPCODE_SLT:
893 a0 = &inst->Src[0].Register; /* a */
894 a1 = &inst->Src[1].Register; /* b */
895 condition = IR3_COND_GE;
896 break;
897 case TGSI_OPCODE_SLE:
898 case TGSI_OPCODE_SGT:
899 a0 = &inst->Src[1].Register; /* b */
900 a1 = &inst->Src[0].Register; /* a */
901 condition = IR3_COND_GE;
902 break;
903 case TGSI_OPCODE_CMP:
904 get_immediate(ctx, &constval0, fui(0.0));
905 a0 = &inst->Src[0].Register; /* a */
906 a1 = &constval0; /* {0.0} */
907 condition = IR3_COND_GE;
908 break;
909 default:
910 compile_assert(ctx, 0);
911 return;
912 }
913
914 if (is_const(a0) && is_const(a1))
915 a0 = get_unconst(ctx, a0);
916
917 /* cmps.f.ge tmp, a0, a1 */
918 instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
919 instr->cat2.condition = condition;
920 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
921
922 switch (t->tgsi_opc) {
923 case TGSI_OPCODE_SEQ:
924 case TGSI_OPCODE_SGE:
925 case TGSI_OPCODE_SLE:
926 /* cov.u16f16 dst, tmp0 */
927 instr = ir3_instr_create(ctx->ir, 1, 0);
928 instr->cat1.src_type = get_utype(ctx);
929 instr->cat1.dst_type = get_ftype(ctx);
930 vectorize(ctx, instr, dst, 1, tmp_src, 0);
931 break;
932 case TGSI_OPCODE_SNE:
933 case TGSI_OPCODE_SGT:
934 case TGSI_OPCODE_SLT:
935 case TGSI_OPCODE_CMP:
936 /* add.s tmp, tmp, -1 */
937 instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
938 vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
939
940 if (t->tgsi_opc == TGSI_OPCODE_CMP) {
941 /* sel.{f32,f16} dst, src2, tmp, src1 */
942 instr = ir3_instr_create(ctx->ir, 3,
943 ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
944 vectorize(ctx, instr, dst, 3,
945 &inst->Src[2].Register, 0,
946 tmp_src, 0,
947 &inst->Src[1].Register, 0);
948 } else {
949 get_immediate(ctx, &constval0, fui(0.0));
950 get_immediate(ctx, &constval1, fui(1.0));
951 /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
952 instr = ir3_instr_create(ctx->ir, 3,
953 ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
954 vectorize(ctx, instr, dst, 3,
955 &constval0, 0, tmp_src, 0, &constval1, 0);
956 }
957
958 break;
959 }
960
961 put_dst(ctx, inst, dst);
962 }
963
964 /*
965 * Conditional / Flow control
966 */
967
968 static unsigned
969 find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
970 {
971 unsigned i;
972 for (i = 0; i < ctx->ir->instrs_count; i++)
973 if (ctx->ir->instrs[i] == instr)
974 return i;
975 return ~0;
976 }
977
978 static void
979 push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
980 {
981 ctx->branch[ctx->branch_count++] = instr;
982 }
983
984 static void
985 pop_branch(struct fd3_compile_context *ctx)
986 {
987 struct ir3_instruction *instr;
988
989 /* if we were clever enough, we'd patch this up after the fact,
990 * and set (jp) flag on whatever the next instruction was, rather
991 * than inserting an extra nop..
992 */
993 instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
994 instr->flags |= IR3_INSTR_JP;
995
996 /* pop the branch instruction from the stack and fix up branch target: */
997 instr = ctx->branch[--ctx->branch_count];
998 instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
999 }
1000
1001 /* We probably don't really want to translate if/else/endif into branches..
1002 * the blob driver evaluates both legs of the if and then uses the sel
1003 * instruction to pick which sides of the branch to "keep".. but figuring
1004 * that out will take somewhat more compiler smarts. So hopefully branches
1005 * don't kill performance too badly.
1006 */
1007 static void
1008 trans_if(const struct instr_translater *t,
1009 struct fd3_compile_context *ctx,
1010 struct tgsi_full_instruction *inst)
1011 {
1012 struct ir3_instruction *instr;
1013 struct tgsi_src_register *src = &inst->Src[0].Register;
1014 struct tgsi_src_register constval;
1015
1016 get_immediate(ctx, &constval, fui(0.0));
1017
1018 if (is_const(src))
1019 src = get_unconst(ctx, src);
1020
1021 instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
1022 ir3_reg_create(instr, regid(REG_P0, 0), 0);
1023 add_src_reg(ctx, instr, src, src->SwizzleX);
1024 add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1025 instr->cat2.condition = IR3_COND_EQ;
1026
1027 instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
1028 push_branch(ctx, instr);
1029 }
1030
1031 static void
1032 trans_else(const struct instr_translater *t,
1033 struct fd3_compile_context *ctx,
1034 struct tgsi_full_instruction *inst)
1035 {
1036 struct ir3_instruction *instr;
1037
1038 /* for first half of if/else/endif, generate a jump past the else: */
1039 instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
1040
1041 pop_branch(ctx);
1042 push_branch(ctx, instr);
1043 }
1044
1045 static void
1046 trans_endif(const struct instr_translater *t,
1047 struct fd3_compile_context *ctx,
1048 struct tgsi_full_instruction *inst)
1049 {
1050 pop_branch(ctx);
1051 }
1052
1053 /*
1054 * Handlers for TGSI instructions which do have 1:1 mapping to native
1055 * instructions:
1056 */
1057
1058 static void
1059 instr_cat0(const struct instr_translater *t,
1060 struct fd3_compile_context *ctx,
1061 struct tgsi_full_instruction *inst)
1062 {
1063 ir3_instr_create(ctx->ir, 0, t->opc);
1064 }
1065
1066 static void
1067 instr_cat1(const struct instr_translater *t,
1068 struct fd3_compile_context *ctx,
1069 struct tgsi_full_instruction *inst)
1070 {
1071 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1072 struct tgsi_src_register *src = &inst->Src[0].Register;
1073
1074 /* mov instructions can't handle a negate on src: */
1075 if (src->Negate) {
1076 struct tgsi_src_register constval;
1077 struct ir3_instruction *instr;
1078
1079 /* since right now, we are using uniformly either TYPE_F16 or
1080 * TYPE_F32, and we don't utilize the conversion possibilities
1081 * of mov instructions, we can get away with substituting an
1082 * add.f which can handle negate. Might need to revisit this
1083 * in the future if we start supporting widening/narrowing or
1084 * conversion to/from integer..
1085 */
1086 instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
1087 get_immediate(ctx, &constval, fui(0.0));
1088 vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
1089 } else {
1090 create_mov(ctx, dst, src);
1091 /* create_mov() generates vector sequence, so no vectorize() */
1092 }
1093 put_dst(ctx, inst, dst);
1094 }
1095
1096 static void
1097 instr_cat2(const struct instr_translater *t,
1098 struct fd3_compile_context *ctx,
1099 struct tgsi_full_instruction *inst)
1100 {
1101 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1102 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1103 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1104 struct ir3_instruction *instr;
1105 unsigned src0_flags = 0, src1_flags = 0;
1106
1107 switch (t->tgsi_opc) {
1108 case TGSI_OPCODE_ABS:
1109 src0_flags = IR3_REG_ABS;
1110 break;
1111 case TGSI_OPCODE_SUB:
1112 src1_flags = IR3_REG_NEGATE;
1113 break;
1114 }
1115
1116 switch (t->opc) {
1117 case OPC_ABSNEG_F:
1118 case OPC_ABSNEG_S:
1119 case OPC_CLZ_B:
1120 case OPC_CLZ_S:
1121 case OPC_SIGN_F:
1122 case OPC_FLOOR_F:
1123 case OPC_CEIL_F:
1124 case OPC_RNDNE_F:
1125 case OPC_RNDAZ_F:
1126 case OPC_TRUNC_F:
1127 case OPC_NOT_B:
1128 case OPC_BFREV_B:
1129 case OPC_SETRM:
1130 case OPC_CBITS_B:
1131 /* these only have one src reg */
1132 instr = ir3_instr_create(ctx->ir, 2, t->opc);
1133 vectorize(ctx, instr, dst, 1, src0, src0_flags);
1134 break;
1135 default:
1136 if (is_const(src0) && is_const(src1))
1137 src0 = get_unconst(ctx, src0);
1138
1139 instr = ir3_instr_create(ctx->ir, 2, t->opc);
1140 vectorize(ctx, instr, dst, 2, src0, src0_flags,
1141 src1, src1_flags);
1142 break;
1143 }
1144
1145 put_dst(ctx, inst, dst);
1146 }
1147
1148 static bool is_mad(opc_t opc)
1149 {
1150 switch (opc) {
1151 case OPC_MAD_U16:
1152 case OPC_MADSH_U16:
1153 case OPC_MAD_S16:
1154 case OPC_MADSH_M16:
1155 case OPC_MAD_U24:
1156 case OPC_MAD_S24:
1157 case OPC_MAD_F16:
1158 case OPC_MAD_F32:
1159 return true;
1160 default:
1161 return false;
1162 }
1163 }
1164
1165 static void
1166 instr_cat3(const struct instr_translater *t,
1167 struct fd3_compile_context *ctx,
1168 struct tgsi_full_instruction *inst)
1169 {
1170 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1171 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1172 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1173 struct ir3_instruction *instr;
1174
1175 /* in particular, can't handle const for src1 for cat3..
1176 * for mad, we can swap first two src's if needed:
1177 */
1178 if (is_rel_or_const(src1)) {
1179 if (is_mad(t->opc) && !is_rel_or_const(src0)) {
1180 struct tgsi_src_register *tmp;
1181 tmp = src0;
1182 src0 = src1;
1183 src1 = tmp;
1184 } else {
1185 src1 = get_unconst(ctx, src1);
1186 }
1187 }
1188
1189 instr = ir3_instr_create(ctx->ir, 3,
1190 ctx->so->half_precision ? t->hopc : t->opc);
1191 vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
1192 &inst->Src[2].Register, 0);
1193 put_dst(ctx, inst, dst);
1194 }
1195
1196 static void
1197 instr_cat4(const struct instr_translater *t,
1198 struct fd3_compile_context *ctx,
1199 struct tgsi_full_instruction *inst)
1200 {
1201 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1202 struct tgsi_src_register *src = &inst->Src[0].Register;
1203 struct ir3_instruction *instr;
1204 unsigned i, n;
1205
1206 /* seems like blob compiler avoids const as src.. */
1207 if (is_const(src))
1208 src = get_unconst(ctx, src);
1209
1210 /* worst case: */
1211 add_nop(ctx, 6);
1212
1213 /* we need to replicate into each component: */
1214 for (i = 0, n = 0; i < 4; i++) {
1215 if (dst->WriteMask & (1 << i)) {
1216 if (n++)
1217 ir3_instr_create(ctx->ir, 0, OPC_NOP);
1218 instr = ir3_instr_create(ctx->ir, 4, t->opc);
1219 add_dst_reg(ctx, instr, dst, i);
1220 add_src_reg(ctx, instr, src, src->SwizzleX);
1221 }
1222 }
1223
1224 regmask_set(&ctx->needs_ss, instr->regs[0]);
1225 put_dst(ctx, inst, dst);
1226 }
1227
1228 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1229 #define INSTR(n, f, ...) \
1230 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1231
1232 INSTR(MOV, instr_cat1),
1233 INSTR(RCP, instr_cat4, .opc = OPC_RCP),
1234 INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
1235 INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
1236 INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
1237 INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
1238 INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
1239 INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
1240 INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
1241 INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1242 INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
1243 INSTR(CLAMP, trans_clamp),
1244 INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
1245 INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
1246 INSTR(ARL, trans_arl),
1247 INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
1248 INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
1249 INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
1250 INSTR(COS, instr_cat4, .opc = OPC_COS),
1251 INSTR(SIN, instr_cat4, .opc = OPC_SIN),
1252 INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1253 INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
1254 INSTR(SGT, trans_cmp),
1255 INSTR(SLT, trans_cmp),
1256 INSTR(SGE, trans_cmp),
1257 INSTR(SLE, trans_cmp),
1258 INSTR(SNE, trans_cmp),
1259 INSTR(SEQ, trans_cmp),
1260 INSTR(CMP, trans_cmp),
1261 INSTR(IF, trans_if),
1262 INSTR(ELSE, trans_else),
1263 INSTR(ENDIF, trans_endif),
1264 INSTR(END, instr_cat0, .opc = OPC_END),
1265 INSTR(KILL, instr_cat0, .opc = OPC_KILL),
1266 };
1267
1268 static fd3_semantic
1269 decl_semantic(const struct tgsi_declaration_semantic *sem)
1270 {
1271 return fd3_semantic_name(sem->Name, sem->Index);
1272 }
1273
1274 static int
1275 decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1276 {
1277 struct fd3_shader_stateobj *so = ctx->so;
1278 unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
1279 unsigned i, flags = 0;
1280 int nop = 0;
1281
1282 /* I don't think we should get frag shader input without
1283 * semantic info? Otherwise how do inputs get linked to
1284 * vert outputs?
1285 */
1286 compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
1287 decl->Declaration.Semantic);
1288
1289 if (ctx->so->half_precision)
1290 flags |= IR3_REG_HALF;
1291
1292 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1293 unsigned n = so->inputs_count++;
1294 unsigned r = regid(i + base, 0);
1295 unsigned ncomp;
1296
1297 /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1298 ncomp = 4;
1299
1300 DBG("decl in -> r%d", i + base); // XXX
1301
1302 so->inputs[n].semantic = decl_semantic(&decl->Semantic);
1303 so->inputs[n].compmask = (1 << ncomp) - 1;
1304 so->inputs[n].regid = r;
1305 so->inputs[n].inloc = ctx->next_inloc;
1306 ctx->next_inloc += ncomp;
1307
1308 so->total_in += ncomp;
1309
1310 /* for frag shaders, we need to generate the corresponding bary instr: */
1311 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1312 unsigned j;
1313
1314 for (j = 0; j < ncomp; j++) {
1315 struct ir3_instruction *instr;
1316 struct ir3_register *dst;
1317
1318 instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
1319
1320 /* dst register: */
1321 dst = ir3_reg_create(instr, r + j, flags);
1322 ctx->last_input = dst;
1323
1324 /* input position: */
1325 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val =
1326 so->inputs[n].inloc + j - 8;
1327
1328 /* input base (always r0.xy): */
1329 ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3;
1330 }
1331
1332 nop = 6;
1333 }
1334 }
1335
1336 return nop;
1337 }
1338
1339 static void
1340 decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1341 {
1342 struct fd3_shader_stateobj *so = ctx->so;
1343 unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1344 unsigned comp = 0;
1345 unsigned name = decl->Semantic.Name;
1346 unsigned i;
1347
1348 compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
1349
1350 DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
1351
1352 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
1353 switch (name) {
1354 case TGSI_SEMANTIC_POSITION:
1355 so->writes_pos = true;
1356 /* fallthrough */
1357 case TGSI_SEMANTIC_PSIZE:
1358 case TGSI_SEMANTIC_COLOR:
1359 case TGSI_SEMANTIC_GENERIC:
1360 case TGSI_SEMANTIC_FOG:
1361 case TGSI_SEMANTIC_TEXCOORD:
1362 break;
1363 default:
1364 compile_error(ctx, "unknown VS semantic name: %s\n",
1365 tgsi_semantic_names[name]);
1366 }
1367 } else {
1368 switch (name) {
1369 case TGSI_SEMANTIC_POSITION:
1370 comp = 2; /* tgsi will write to .z component */
1371 so->writes_pos = true;
1372 /* fallthrough */
1373 case TGSI_SEMANTIC_COLOR:
1374 break;
1375 default:
1376 compile_error(ctx, "unknown FS semantic name: %s\n",
1377 tgsi_semantic_names[name]);
1378 }
1379 }
1380
1381 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1382 unsigned n = so->outputs_count++;
1383 so->outputs[n].semantic = decl_semantic(&decl->Semantic);
1384 so->outputs[n].regid = regid(i + base, comp);
1385 }
1386 }
1387
1388 static void
1389 decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1390 {
1391 ctx->so->samplers_count++;
1392 }
1393
1394 static void
1395 compile_instructions(struct fd3_compile_context *ctx)
1396 {
1397 struct ir3_shader *ir = ctx->ir;
1398 int nop = 0;
1399
1400 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1401 tgsi_parse_token(&ctx->parser);
1402
1403 switch (ctx->parser.FullToken.Token.Type) {
1404 case TGSI_TOKEN_TYPE_DECLARATION: {
1405 struct tgsi_full_declaration *decl =
1406 &ctx->parser.FullToken.FullDeclaration;
1407 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
1408 decl_out(ctx, decl);
1409 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
1410 nop = decl_in(ctx, decl);
1411 } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
1412 decl_samp(ctx, decl);
1413 }
1414 break;
1415 }
1416 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1417 /* TODO: if we know the immediate is small enough, and only
1418 * used with instructions that can embed an immediate, we
1419 * can skip this:
1420 */
1421 struct tgsi_full_immediate *imm =
1422 &ctx->parser.FullToken.FullImmediate;
1423 unsigned n = ctx->so->immediates_count++;
1424 memcpy(ctx->so->immediates[n].val, imm->u, 16);
1425 break;
1426 }
1427 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1428 struct tgsi_full_instruction *inst =
1429 &ctx->parser.FullToken.FullInstruction;
1430 unsigned opc = inst->Instruction.Opcode;
1431 const struct instr_translater *t = &translaters[opc];
1432
1433 add_nop(ctx, nop);
1434 nop = 0;
1435
1436 if (t->fxn) {
1437 t->fxn(t, ctx, inst);
1438 ctx->num_internal_temps = 0;
1439 } else {
1440 compile_error(ctx, "unknown TGSI opc: %s\n",
1441 tgsi_get_opcode_name(opc));
1442 }
1443
1444 switch (inst->Instruction.Saturate) {
1445 case TGSI_SAT_ZERO_ONE:
1446 create_clamp_imm(ctx, &inst->Dst[0].Register,
1447 fui(0.0), fui(1.0));
1448 break;
1449 case TGSI_SAT_MINUS_PLUS_ONE:
1450 create_clamp_imm(ctx, &inst->Dst[0].Register,
1451 fui(-1.0), fui(1.0));
1452 break;
1453 }
1454
1455 break;
1456 }
1457 default:
1458 break;
1459 }
1460 }
1461
1462 if (ir->instrs_count > 0)
1463 ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1464
1465 if (ctx->last_input)
1466 ctx->last_input->flags |= IR3_REG_EI;
1467
1468 handle_last_rel(ctx);
1469 }
1470
1471 int
1472 fd3_compile_shader(struct fd3_shader_stateobj *so,
1473 const struct tgsi_token *tokens)
1474 {
1475 struct fd3_compile_context ctx;
1476
1477 assert(!so->ir);
1478
1479 so->ir = ir3_shader_create();
1480
1481 assert(so->ir);
1482
1483 if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
1484 return -1;
1485
1486 compile_instructions(&ctx);
1487
1488 compile_free(&ctx);
1489
1490 return 0;
1491 }