freedreno/ir3: avoid scheduler deadlock
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_compiler_old.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include <stdarg.h>
30
31 #include "pipe/p_state.h"
32 #include "util/u_string.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "tgsi/tgsi_lowering.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_ureg.h"
38 #include "tgsi/tgsi_info.h"
39 #include "tgsi/tgsi_strings.h"
40 #include "tgsi/tgsi_dump.h"
41 #include "tgsi/tgsi_scan.h"
42
43 #include "freedreno_util.h"
44
45 #include "ir3_compiler.h"
46 #include "ir3_shader.h"
47
48 #include "instr-a3xx.h"
49 #include "ir3.h"
50
51
52 struct ir3_compile_context {
53 const struct tgsi_token *tokens;
54 bool free_tokens;
55 struct ir3 *ir;
56 struct ir3_block *block;
57 struct ir3_shader_variant *so;
58
59 struct tgsi_parse_context parser;
60 unsigned type;
61
62 struct tgsi_shader_info info;
63
64 /* last input dst (for setting (ei) flag): */
65 struct ir3_register *last_input;
66
67 /* last instruction with relative addressing: */
68 struct ir3_instruction *last_rel;
69
70 /* for calculating input/output positions/linkages: */
71 unsigned next_inloc;
72
73 unsigned num_internal_temps;
74 struct tgsi_src_register internal_temps[6];
75
76 /* track registers which need to synchronize w/ "complex alu" cat3
77 * instruction pipeline:
78 */
79 regmask_t needs_ss;
80
81 /* track registers which need to synchronize with texture fetch
82 * pipeline:
83 */
84 regmask_t needs_sy;
85
86 /* inputs start at r0, temporaries start after last input, and
87 * outputs start after last temporary.
88 *
89 * We could be more clever, because this is not a hw restriction,
90 * but probably best just to implement an optimizing pass to
91 * reduce the # of registers used and get rid of redundant mov's
92 * (to output register).
93 */
94 unsigned base_reg[TGSI_FILE_COUNT];
95
96 /* idx/slot for last compiler generated immediate */
97 unsigned immediate_idx;
98
99 /* stack of branch instructions that start (potentially nested)
100 * branch instructions, so that we can fix up the branch targets
101 * so that we can fix up the branch target on the corresponding
102 * END instruction
103 */
104 struct ir3_instruction *branch[16];
105 unsigned int branch_count;
106
107 /* used when dst is same as one of the src, to avoid overwriting a
108 * src element before the remaining scalar instructions that make
109 * up the vector operation
110 */
111 struct tgsi_dst_register tmp_dst;
112 struct tgsi_src_register *tmp_src;
113 };
114
115
116 static void vectorize(struct ir3_compile_context *ctx,
117 struct ir3_instruction *instr, struct tgsi_dst_register *dst,
118 int nsrcs, ...);
119 static void create_mov(struct ir3_compile_context *ctx,
120 struct tgsi_dst_register *dst, struct tgsi_src_register *src);
121
122 static unsigned
123 compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
124 const struct tgsi_token *tokens)
125 {
126 unsigned ret, base = 0;
127 struct tgsi_shader_info *info = &ctx->info;
128 struct tgsi_lowering_config lconfig = {
129 .color_two_side = so->key.color_two_side,
130 .lower_DST = true,
131 .lower_XPD = true,
132 .lower_SCS = true,
133 .lower_LRP = true,
134 .lower_FRC = true,
135 .lower_POW = true,
136 .lower_LIT = true,
137 .lower_EXP = true,
138 .lower_LOG = true,
139 .lower_DP4 = true,
140 .lower_DP3 = true,
141 .lower_DPH = true,
142 .lower_DP2 = true,
143 .lower_DP2A = true,
144 };
145
146 switch (so->type) {
147 case SHADER_FRAGMENT:
148 case SHADER_COMPUTE:
149 lconfig.saturate_s = so->key.fsaturate_s;
150 lconfig.saturate_t = so->key.fsaturate_t;
151 lconfig.saturate_r = so->key.fsaturate_r;
152 break;
153 case SHADER_VERTEX:
154 lconfig.saturate_s = so->key.vsaturate_s;
155 lconfig.saturate_t = so->key.vsaturate_t;
156 lconfig.saturate_r = so->key.vsaturate_r;
157 break;
158 }
159
160 ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
161 ctx->free_tokens = !!ctx->tokens;
162 if (!ctx->tokens) {
163 /* no lowering */
164 ctx->tokens = tokens;
165 }
166 ctx->ir = so->ir;
167 ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
168 ctx->so = so;
169 ctx->last_input = NULL;
170 ctx->last_rel = NULL;
171 ctx->next_inloc = 8;
172 ctx->num_internal_temps = 0;
173 ctx->branch_count = 0;
174
175 regmask_init(&ctx->needs_ss);
176 regmask_init(&ctx->needs_sy);
177 memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
178
179 /* Immediates go after constants: */
180 ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
181 ctx->base_reg[TGSI_FILE_IMMEDIATE] =
182 info->file_max[TGSI_FILE_CONSTANT] + 1;
183
184 /* if full precision and fragment shader, don't clobber
185 * r0.x w/ bary fetch:
186 */
187 if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision)
188 base = 1;
189
190 /* Temporaries after outputs after inputs: */
191 ctx->base_reg[TGSI_FILE_INPUT] = base;
192 ctx->base_reg[TGSI_FILE_OUTPUT] = base +
193 info->file_max[TGSI_FILE_INPUT] + 1;
194 ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
195 info->file_max[TGSI_FILE_INPUT] + 1 +
196 info->file_max[TGSI_FILE_OUTPUT] + 1;
197
198 so->first_driver_param = ~0;
199 so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
200 ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
201
202 ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
203 if (ret != TGSI_PARSE_OK)
204 return ret;
205
206 ctx->type = ctx->parser.FullHeader.Processor.Processor;
207
208 return ret;
209 }
210
211 static void
212 compile_error(struct ir3_compile_context *ctx, const char *format, ...)
213 {
214 va_list ap;
215 va_start(ap, format);
216 _debug_vprintf(format, ap);
217 va_end(ap);
218 tgsi_dump(ctx->tokens, 0);
219 debug_assert(0);
220 }
221
222 #define compile_assert(ctx, cond) do { \
223 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
224 } while (0)
225
226 static void
227 compile_free(struct ir3_compile_context *ctx)
228 {
229 if (ctx->free_tokens)
230 free((void *)ctx->tokens);
231 tgsi_parse_free(&ctx->parser);
232 }
233
234 struct instr_translater {
235 void (*fxn)(const struct instr_translater *t,
236 struct ir3_compile_context *ctx,
237 struct tgsi_full_instruction *inst);
238 unsigned tgsi_opc;
239 opc_t opc;
240 opc_t hopc; /* opc to use for half_precision mode, if different */
241 unsigned arg;
242 };
243
244 static void
245 handle_last_rel(struct ir3_compile_context *ctx)
246 {
247 if (ctx->last_rel) {
248 ctx->last_rel->flags |= IR3_INSTR_UL;
249 ctx->last_rel = NULL;
250 }
251 }
252
253 static struct ir3_instruction *
254 instr_create(struct ir3_compile_context *ctx, int category, opc_t opc)
255 {
256 return ir3_instr_create(ctx->block, category, opc);
257 }
258
259 static void
260 add_nop(struct ir3_compile_context *ctx, unsigned count)
261 {
262 while (count-- > 0)
263 instr_create(ctx, 0, OPC_NOP);
264 }
265
266 static unsigned
267 src_flags(struct ir3_compile_context *ctx, struct ir3_register *reg)
268 {
269 unsigned flags = 0;
270
271 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
272 return flags;
273
274 if (regmask_get(&ctx->needs_ss, reg)) {
275 flags |= IR3_INSTR_SS;
276 regmask_init(&ctx->needs_ss);
277 }
278
279 if (regmask_get(&ctx->needs_sy, reg)) {
280 flags |= IR3_INSTR_SY;
281 regmask_init(&ctx->needs_sy);
282 }
283
284 return flags;
285 }
286
287 static struct ir3_register *
288 add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
289 const struct tgsi_dst_register *dst, unsigned chan)
290 {
291 unsigned flags = 0, num = 0;
292 struct ir3_register *reg;
293
294 switch (dst->File) {
295 case TGSI_FILE_OUTPUT:
296 case TGSI_FILE_TEMPORARY:
297 num = dst->Index + ctx->base_reg[dst->File];
298 break;
299 case TGSI_FILE_ADDRESS:
300 num = REG_A0;
301 break;
302 default:
303 compile_error(ctx, "unsupported dst register file: %s\n",
304 tgsi_file_name(dst->File));
305 break;
306 }
307
308 if (dst->Indirect)
309 flags |= IR3_REG_RELATIV;
310 if (ctx->so->key.half_precision)
311 flags |= IR3_REG_HALF;
312
313 reg = ir3_reg_create(instr, regid(num, chan), flags);
314
315 if (dst->Indirect)
316 ctx->last_rel = instr;
317
318 return reg;
319 }
320
321 static struct ir3_register *
322 add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
323 const struct tgsi_src_register *src, unsigned chan)
324 {
325 unsigned flags = 0, num = 0;
326 struct ir3_register *reg;
327
328 /* TODO we need to use a mov to temp for const >= 64.. or maybe
329 * we could use relative addressing..
330 */
331 compile_assert(ctx, src->Index < 64);
332
333 switch (src->File) {
334 case TGSI_FILE_IMMEDIATE:
335 /* TODO if possible, use actual immediate instead of const.. but
336 * TGSI has vec4 immediates, we can only embed scalar (of limited
337 * size, depending on instruction..)
338 */
339 case TGSI_FILE_CONSTANT:
340 flags |= IR3_REG_CONST;
341 num = src->Index + ctx->base_reg[src->File];
342 break;
343 case TGSI_FILE_OUTPUT:
344 /* NOTE: we should only end up w/ OUTPUT file for things like
345 * clamp()'ing saturated dst instructions
346 */
347 case TGSI_FILE_INPUT:
348 case TGSI_FILE_TEMPORARY:
349 num = src->Index + ctx->base_reg[src->File];
350 break;
351 default:
352 compile_error(ctx, "unsupported src register file: %s\n",
353 tgsi_file_name(src->File));
354 break;
355 }
356
357 if (src->Absolute)
358 flags |= IR3_REG_ABS;
359 if (src->Negate)
360 flags |= IR3_REG_NEGATE;
361 if (src->Indirect)
362 flags |= IR3_REG_RELATIV;
363 if (ctx->so->key.half_precision)
364 flags |= IR3_REG_HALF;
365
366 reg = ir3_reg_create(instr, regid(num, chan), flags);
367
368 if (src->Indirect)
369 ctx->last_rel = instr;
370
371 instr->flags |= src_flags(ctx, reg);
372
373 return reg;
374 }
375
376 static void
377 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
378 {
379 src->File = dst->File;
380 src->Indirect = dst->Indirect;
381 src->Dimension = dst->Dimension;
382 src->Index = dst->Index;
383 src->Absolute = 0;
384 src->Negate = 0;
385 src->SwizzleX = TGSI_SWIZZLE_X;
386 src->SwizzleY = TGSI_SWIZZLE_Y;
387 src->SwizzleZ = TGSI_SWIZZLE_Z;
388 src->SwizzleW = TGSI_SWIZZLE_W;
389 }
390
391 /* Get internal-temp src/dst to use for a sequence of instructions
392 * generated by a single TGSI op.
393 */
394 static struct tgsi_src_register *
395 get_internal_temp(struct ir3_compile_context *ctx,
396 struct tgsi_dst_register *tmp_dst)
397 {
398 struct tgsi_src_register *tmp_src;
399 int n;
400
401 tmp_dst->File = TGSI_FILE_TEMPORARY;
402 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
403 tmp_dst->Indirect = 0;
404 tmp_dst->Dimension = 0;
405
406 /* assign next temporary: */
407 n = ctx->num_internal_temps++;
408 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
409 tmp_src = &ctx->internal_temps[n];
410
411 tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
412
413 src_from_dst(tmp_src, tmp_dst);
414
415 return tmp_src;
416 }
417
418 /* Get internal half-precision temp src/dst to use for a sequence of
419 * instructions generated by a single TGSI op.
420 */
421 static struct tgsi_src_register *
422 get_internal_temp_hr(struct ir3_compile_context *ctx,
423 struct tgsi_dst_register *tmp_dst)
424 {
425 struct tgsi_src_register *tmp_src;
426 int n;
427
428 if (ctx->so->key.half_precision)
429 return get_internal_temp(ctx, tmp_dst);
430
431 tmp_dst->File = TGSI_FILE_TEMPORARY;
432 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
433 tmp_dst->Indirect = 0;
434 tmp_dst->Dimension = 0;
435
436 /* assign next temporary: */
437 n = ctx->num_internal_temps++;
438 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
439 tmp_src = &ctx->internal_temps[n];
440
441 /* just use hr0 because no one else should be using half-
442 * precision regs:
443 */
444 tmp_dst->Index = 0;
445
446 src_from_dst(tmp_src, tmp_dst);
447
448 return tmp_src;
449 }
450
451 static inline bool
452 is_const(struct tgsi_src_register *src)
453 {
454 return (src->File == TGSI_FILE_CONSTANT) ||
455 (src->File == TGSI_FILE_IMMEDIATE);
456 }
457
458 static inline bool
459 is_relative(struct tgsi_src_register *src)
460 {
461 return src->Indirect;
462 }
463
464 static inline bool
465 is_rel_or_const(struct tgsi_src_register *src)
466 {
467 return is_relative(src) || is_const(src);
468 }
469
470 static type_t
471 get_ftype(struct ir3_compile_context *ctx)
472 {
473 return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32;
474 }
475
476 static type_t
477 get_utype(struct ir3_compile_context *ctx)
478 {
479 return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32;
480 }
481
482 static unsigned
483 src_swiz(struct tgsi_src_register *src, int chan)
484 {
485 switch (chan) {
486 case 0: return src->SwizzleX;
487 case 1: return src->SwizzleY;
488 case 2: return src->SwizzleZ;
489 case 3: return src->SwizzleW;
490 }
491 assert(0);
492 return 0;
493 }
494
495 /* for instructions that cannot take a const register as src, if needed
496 * generate a move to temporary gpr:
497 */
498 static struct tgsi_src_register *
499 get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src)
500 {
501 struct tgsi_dst_register tmp_dst;
502 struct tgsi_src_register *tmp_src;
503
504 compile_assert(ctx, is_rel_or_const(src));
505
506 tmp_src = get_internal_temp(ctx, &tmp_dst);
507
508 create_mov(ctx, &tmp_dst, src);
509
510 return tmp_src;
511 }
512
513 static void
514 get_immediate(struct ir3_compile_context *ctx,
515 struct tgsi_src_register *reg, uint32_t val)
516 {
517 unsigned neg, swiz, idx, i;
518 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
519 static const unsigned swiz2tgsi[] = {
520 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
521 };
522
523 for (i = 0; i < ctx->immediate_idx; i++) {
524 swiz = i % 4;
525 idx = i / 4;
526
527 if (ctx->so->immediates[idx].val[swiz] == val) {
528 neg = 0;
529 break;
530 }
531
532 if (ctx->so->immediates[idx].val[swiz] == -val) {
533 neg = 1;
534 break;
535 }
536 }
537
538 if (i == ctx->immediate_idx) {
539 /* need to generate a new immediate: */
540 swiz = i % 4;
541 idx = i / 4;
542 neg = 0;
543 ctx->so->immediates[idx].val[swiz] = val;
544 ctx->so->immediates_count = idx + 1;
545 ctx->immediate_idx++;
546 }
547
548 reg->File = TGSI_FILE_IMMEDIATE;
549 reg->Indirect = 0;
550 reg->Dimension = 0;
551 reg->Index = idx;
552 reg->Absolute = 0;
553 reg->Negate = neg;
554 reg->SwizzleX = swiz2tgsi[swiz];
555 reg->SwizzleY = swiz2tgsi[swiz];
556 reg->SwizzleZ = swiz2tgsi[swiz];
557 reg->SwizzleW = swiz2tgsi[swiz];
558 }
559
560 static void
561 create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst,
562 struct tgsi_src_register *src)
563 {
564 type_t type_mov = get_ftype(ctx);
565 unsigned i;
566
567 for (i = 0; i < 4; i++) {
568 /* move to destination: */
569 if (dst->WriteMask & (1 << i)) {
570 struct ir3_instruction *instr;
571
572 if (src->Absolute || src->Negate) {
573 /* can't have abs or neg on a mov instr, so use
574 * absneg.f instead to handle these cases:
575 */
576 instr = instr_create(ctx, 2, OPC_ABSNEG_F);
577 } else {
578 instr = instr_create(ctx, 1, 0);
579 instr->cat1.src_type = type_mov;
580 instr->cat1.dst_type = type_mov;
581 }
582
583 add_dst_reg(ctx, instr, dst, i);
584 add_src_reg(ctx, instr, src, src_swiz(src, i));
585 } else {
586 add_nop(ctx, 1);
587 }
588 }
589 }
590
591 static void
592 create_clamp(struct ir3_compile_context *ctx,
593 struct tgsi_dst_register *dst, struct tgsi_src_register *val,
594 struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
595 {
596 struct ir3_instruction *instr;
597
598 instr = instr_create(ctx, 2, OPC_MAX_F);
599 vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
600
601 instr = instr_create(ctx, 2, OPC_MIN_F);
602 vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
603 }
604
605 static void
606 create_clamp_imm(struct ir3_compile_context *ctx,
607 struct tgsi_dst_register *dst,
608 uint32_t minval, uint32_t maxval)
609 {
610 struct tgsi_src_register minconst, maxconst;
611 struct tgsi_src_register src;
612
613 src_from_dst(&src, dst);
614
615 get_immediate(ctx, &minconst, minval);
616 get_immediate(ctx, &maxconst, maxval);
617
618 create_clamp(ctx, dst, &src, &minconst, &maxconst);
619 }
620
621 static struct tgsi_dst_register *
622 get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst)
623 {
624 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
625 unsigned i;
626 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
627 struct tgsi_src_register *src = &inst->Src[i].Register;
628 if ((src->File == dst->File) && (src->Index == dst->Index)) {
629 if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
630 (src->SwizzleX == TGSI_SWIZZLE_X) &&
631 (src->SwizzleY == TGSI_SWIZZLE_Y) &&
632 (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
633 (src->SwizzleW == TGSI_SWIZZLE_W))
634 continue;
635 ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
636 ctx->tmp_dst.WriteMask = dst->WriteMask;
637 dst = &ctx->tmp_dst;
638 break;
639 }
640 }
641 return dst;
642 }
643
644 static void
645 put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst,
646 struct tgsi_dst_register *dst)
647 {
648 /* if necessary, add mov back into original dst: */
649 if (dst != &inst->Dst[0].Register) {
650 create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
651 }
652 }
653
654 /* helper to generate the necessary repeat and/or additional instructions
655 * to turn a scalar instruction into a vector operation:
656 */
657 static void
658 vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
659 struct tgsi_dst_register *dst, int nsrcs, ...)
660 {
661 va_list ap;
662 int i, j, n = 0;
663 bool indirect = dst->Indirect;
664
665 add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
666
667 va_start(ap, nsrcs);
668 for (j = 0; j < nsrcs; j++) {
669 struct tgsi_src_register *src =
670 va_arg(ap, struct tgsi_src_register *);
671 unsigned flags = va_arg(ap, unsigned);
672 struct ir3_register *reg;
673 if (flags & IR3_REG_IMMED) {
674 reg = ir3_reg_create(instr, 0, IR3_REG_IMMED);
675 /* this is an ugly cast.. should have put flags first! */
676 reg->iim_val = *(int *)&src;
677 } else {
678 reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
679 indirect |= src->Indirect;
680 }
681 reg->flags |= flags & ~IR3_REG_NEGATE;
682 if (flags & IR3_REG_NEGATE)
683 reg->flags ^= IR3_REG_NEGATE;
684 }
685 va_end(ap);
686
687 for (i = 0; i < 4; i++) {
688 if (dst->WriteMask & (1 << i)) {
689 struct ir3_instruction *cur;
690
691 if (n++ == 0) {
692 cur = instr;
693 } else {
694 cur = ir3_instr_clone(instr);
695 cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
696 }
697
698 /* fix-up dst register component: */
699 cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
700
701 /* fix-up src register component: */
702 va_start(ap, nsrcs);
703 for (j = 0; j < nsrcs; j++) {
704 struct tgsi_src_register *src =
705 va_arg(ap, struct tgsi_src_register *);
706 unsigned flags = va_arg(ap, unsigned);
707 if (!(flags & IR3_REG_IMMED)) {
708 cur->regs[j+1]->num =
709 regid(cur->regs[j+1]->num >> 2,
710 src_swiz(src, i));
711 cur->flags |= src_flags(ctx, cur->regs[j+1]);
712 }
713 }
714 va_end(ap);
715
716 if (indirect)
717 ctx->last_rel = cur;
718 }
719 }
720
721 /* pad w/ nop's.. at least until we are clever enough to
722 * figure out if we really need to..
723 */
724 add_nop(ctx, 4 - n);
725 }
726
727 /*
728 * Handlers for TGSI instructions which do not have a 1:1 mapping to
729 * native instructions:
730 */
731
732 static void
733 trans_clamp(const struct instr_translater *t,
734 struct ir3_compile_context *ctx,
735 struct tgsi_full_instruction *inst)
736 {
737 struct tgsi_dst_register *dst = get_dst(ctx, inst);
738 struct tgsi_src_register *src0 = &inst->Src[0].Register;
739 struct tgsi_src_register *src1 = &inst->Src[1].Register;
740 struct tgsi_src_register *src2 = &inst->Src[2].Register;
741
742 create_clamp(ctx, dst, src0, src1, src2);
743
744 put_dst(ctx, inst, dst);
745 }
746
747 /* ARL(x) = x, but mova from hrN.x to a0.. */
748 static void
749 trans_arl(const struct instr_translater *t,
750 struct ir3_compile_context *ctx,
751 struct tgsi_full_instruction *inst)
752 {
753 struct ir3_instruction *instr;
754 struct tgsi_dst_register tmp_dst;
755 struct tgsi_src_register *tmp_src;
756 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
757 struct tgsi_src_register *src = &inst->Src[0].Register;
758 unsigned chan = src->SwizzleX;
759 compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
760
761 handle_last_rel(ctx);
762
763 tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
764
765 /* cov.{f32,f16}s16 Rtmp, Rsrc */
766 instr = instr_create(ctx, 1, 0);
767 instr->cat1.src_type = get_ftype(ctx);
768 instr->cat1.dst_type = TYPE_S16;
769 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
770 add_src_reg(ctx, instr, src, chan);
771
772 add_nop(ctx, 3);
773
774 /* shl.b Rtmp, Rtmp, 2 */
775 instr = instr_create(ctx, 2, OPC_SHL_B);
776 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
777 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
778 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
779
780 add_nop(ctx, 3);
781
782 /* mova a0, Rtmp */
783 instr = instr_create(ctx, 1, 0);
784 instr->cat1.src_type = TYPE_S16;
785 instr->cat1.dst_type = TYPE_S16;
786 add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
787 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
788
789 /* need to ensure 5 instr slots before a0 is used: */
790 add_nop(ctx, 6);
791 }
792
793 /* texture fetch/sample instructions: */
794 static void
795 trans_samp(const struct instr_translater *t,
796 struct ir3_compile_context *ctx,
797 struct tgsi_full_instruction *inst)
798 {
799 struct ir3_register *r;
800 struct ir3_instruction *instr;
801 struct tgsi_src_register *coord = &inst->Src[0].Register;
802 struct tgsi_src_register *samp = &inst->Src[1].Register;
803 unsigned tex = inst->Texture.Texture;
804 int8_t *order;
805 unsigned i, flags = 0, src_wrmask;
806 bool needs_mov = false;
807
808 switch (t->arg) {
809 case TGSI_OPCODE_TEX:
810 if (tex == TGSI_TEXTURE_2D) {
811 order = (int8_t[4]){ 0, 1, -1, -1 };
812 src_wrmask = TGSI_WRITEMASK_XY;
813 } else {
814 order = (int8_t[4]){ 0, 1, 2, -1 };
815 src_wrmask = TGSI_WRITEMASK_XYZ;
816 }
817 break;
818 case TGSI_OPCODE_TXP:
819 if (tex == TGSI_TEXTURE_2D) {
820 order = (int8_t[4]){ 0, 1, 3, -1 };
821 src_wrmask = TGSI_WRITEMASK_XYZ;
822 } else {
823 order = (int8_t[4]){ 0, 1, 2, 3 };
824 src_wrmask = TGSI_WRITEMASK_XYZW;
825 }
826 flags |= IR3_INSTR_P;
827 break;
828 default:
829 compile_assert(ctx, 0);
830 break;
831 }
832
833 if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) {
834 add_nop(ctx, 3);
835 flags |= IR3_INSTR_3D;
836 }
837
838 /* cat5 instruction cannot seem to handle const or relative: */
839 if (is_rel_or_const(coord))
840 needs_mov = true;
841
842 /* The texture sample instructions need to coord in successive
843 * registers/components (ie. src.xy but not src.yx). And TXP
844 * needs the .w component in .z for 2D.. so in some cases we
845 * might need to emit some mov instructions to shuffle things
846 * around:
847 */
848 for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
849 if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
850 needs_mov = true;
851
852 if (needs_mov) {
853 struct tgsi_dst_register tmp_dst;
854 struct tgsi_src_register *tmp_src;
855 unsigned j;
856
857 type_t type_mov = get_ftype(ctx);
858
859 /* need to move things around: */
860 tmp_src = get_internal_temp(ctx, &tmp_dst);
861
862 for (j = 0; (j < 4) && (order[j] >= 0); j++) {
863 instr = instr_create(ctx, 1, 0);
864 instr->cat1.src_type = type_mov;
865 instr->cat1.dst_type = type_mov;
866 add_dst_reg(ctx, instr, &tmp_dst, j);
867 add_src_reg(ctx, instr, coord,
868 src_swiz(coord, order[j]));
869 }
870
871 coord = tmp_src;
872
873 add_nop(ctx, 4 - j);
874 }
875
876 instr = instr_create(ctx, 5, t->opc);
877 instr->cat5.type = get_ftype(ctx);
878 instr->cat5.samp = samp->Index;
879 instr->cat5.tex = samp->Index;
880 instr->flags |= flags;
881
882 r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
883 r->wrmask = inst->Dst[0].Register.WriteMask;
884
885 add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask;
886
887 /* after add_src_reg() so we don't set (sy) on sam instr itself! */
888 regmask_set(&ctx->needs_sy, r);
889 }
890
891 /*
892 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
893 * cmps.f.eq tmp0, b, a
894 * cov.u16f16 dst, tmp0
895 *
896 * SNE(a,b) = (a != b) ? 1.0 : 0.0
897 * cmps.f.eq tmp0, b, a
898 * add.s tmp0, tmp0, -1
899 * sel.f16 dst, {0.0}, tmp0, {1.0}
900 *
901 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
902 * cmps.f.ge tmp0, a, b
903 * cov.u16f16 dst, tmp0
904 *
905 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
906 * cmps.f.ge tmp0, b, a
907 * cov.u16f16 dst, tmp0
908 *
909 * SGT(a,b) = (a > b) ? 1.0 : 0.0
910 * cmps.f.ge tmp0, b, a
911 * add.s tmp0, tmp0, -1
912 * sel.f16 dst, {0.0}, tmp0, {1.0}
913 *
914 * SLT(a,b) = (a < b) ? 1.0 : 0.0
915 * cmps.f.ge tmp0, a, b
916 * add.s tmp0, tmp0, -1
917 * sel.f16 dst, {0.0}, tmp0, {1.0}
918 *
919 * CMP(a,b,c) = (a < 0.0) ? b : c
920 * cmps.f.ge tmp0, a, {0.0}
921 * add.s tmp0, tmp0, -1
922 * sel.f16 dst, c, tmp0, b
923 */
924 static void
925 trans_cmp(const struct instr_translater *t,
926 struct ir3_compile_context *ctx,
927 struct tgsi_full_instruction *inst)
928 {
929 struct ir3_instruction *instr;
930 struct tgsi_dst_register tmp_dst;
931 struct tgsi_src_register *tmp_src;
932 struct tgsi_src_register constval0, constval1;
933 /* final instruction for CMP() uses orig src1 and src2: */
934 struct tgsi_dst_register *dst = get_dst(ctx, inst);
935 struct tgsi_src_register *a0, *a1;
936 unsigned condition;
937
938 tmp_src = get_internal_temp(ctx, &tmp_dst);
939
940 switch (t->tgsi_opc) {
941 case TGSI_OPCODE_SEQ:
942 case TGSI_OPCODE_SNE:
943 a0 = &inst->Src[1].Register; /* b */
944 a1 = &inst->Src[0].Register; /* a */
945 condition = IR3_COND_EQ;
946 break;
947 case TGSI_OPCODE_SGE:
948 case TGSI_OPCODE_SLT:
949 a0 = &inst->Src[0].Register; /* a */
950 a1 = &inst->Src[1].Register; /* b */
951 condition = IR3_COND_GE;
952 break;
953 case TGSI_OPCODE_SLE:
954 case TGSI_OPCODE_SGT:
955 a0 = &inst->Src[1].Register; /* b */
956 a1 = &inst->Src[0].Register; /* a */
957 condition = IR3_COND_GE;
958 break;
959 case TGSI_OPCODE_CMP:
960 get_immediate(ctx, &constval0, fui(0.0));
961 a0 = &inst->Src[0].Register; /* a */
962 a1 = &constval0; /* {0.0} */
963 condition = IR3_COND_GE;
964 break;
965 default:
966 compile_assert(ctx, 0);
967 return;
968 }
969
970 if (is_const(a0) && is_const(a1))
971 a0 = get_unconst(ctx, a0);
972
973 /* cmps.f.ge tmp, a0, a1 */
974 instr = instr_create(ctx, 2, OPC_CMPS_F);
975 instr->cat2.condition = condition;
976 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
977
978 switch (t->tgsi_opc) {
979 case TGSI_OPCODE_SEQ:
980 case TGSI_OPCODE_SGE:
981 case TGSI_OPCODE_SLE:
982 /* cov.u16f16 dst, tmp0 */
983 instr = instr_create(ctx, 1, 0);
984 instr->cat1.src_type = get_utype(ctx);
985 instr->cat1.dst_type = get_ftype(ctx);
986 vectorize(ctx, instr, dst, 1, tmp_src, 0);
987 break;
988 case TGSI_OPCODE_SNE:
989 case TGSI_OPCODE_SGT:
990 case TGSI_OPCODE_SLT:
991 case TGSI_OPCODE_CMP:
992 /* add.s tmp, tmp, -1 */
993 instr = instr_create(ctx, 2, OPC_ADD_S);
994 vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
995
996 if (t->tgsi_opc == TGSI_OPCODE_CMP) {
997 /* sel.{f32,f16} dst, src2, tmp, src1 */
998 instr = instr_create(ctx, 3,
999 ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
1000 vectorize(ctx, instr, dst, 3,
1001 &inst->Src[2].Register, 0,
1002 tmp_src, 0,
1003 &inst->Src[1].Register, 0);
1004 } else {
1005 get_immediate(ctx, &constval0, fui(0.0));
1006 get_immediate(ctx, &constval1, fui(1.0));
1007 /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
1008 instr = instr_create(ctx, 3,
1009 ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
1010 vectorize(ctx, instr, dst, 3,
1011 &constval0, 0, tmp_src, 0, &constval1, 0);
1012 }
1013
1014 break;
1015 }
1016
1017 put_dst(ctx, inst, dst);
1018 }
1019
1020 /*
1021 * Conditional / Flow control
1022 */
1023
1024 static unsigned
1025 find_instruction(struct ir3_compile_context *ctx, struct ir3_instruction *instr)
1026 {
1027 unsigned i;
1028 for (i = 0; i < ctx->ir->instrs_count; i++)
1029 if (ctx->ir->instrs[i] == instr)
1030 return i;
1031 return ~0;
1032 }
1033
1034 static void
1035 push_branch(struct ir3_compile_context *ctx, struct ir3_instruction *instr)
1036 {
1037 ctx->branch[ctx->branch_count++] = instr;
1038 }
1039
1040 static void
1041 pop_branch(struct ir3_compile_context *ctx)
1042 {
1043 struct ir3_instruction *instr;
1044
1045 /* if we were clever enough, we'd patch this up after the fact,
1046 * and set (jp) flag on whatever the next instruction was, rather
1047 * than inserting an extra nop..
1048 */
1049 instr = instr_create(ctx, 0, OPC_NOP);
1050 instr->flags |= IR3_INSTR_JP;
1051
1052 /* pop the branch instruction from the stack and fix up branch target: */
1053 instr = ctx->branch[--ctx->branch_count];
1054 instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
1055 }
1056
1057 /* We probably don't really want to translate if/else/endif into branches..
1058 * the blob driver evaluates both legs of the if and then uses the sel
1059 * instruction to pick which sides of the branch to "keep".. but figuring
1060 * that out will take somewhat more compiler smarts. So hopefully branches
1061 * don't kill performance too badly.
1062 */
1063 static void
1064 trans_if(const struct instr_translater *t,
1065 struct ir3_compile_context *ctx,
1066 struct tgsi_full_instruction *inst)
1067 {
1068 struct ir3_instruction *instr;
1069 struct tgsi_src_register *src = &inst->Src[0].Register;
1070 struct tgsi_src_register constval;
1071
1072 get_immediate(ctx, &constval, fui(0.0));
1073
1074 if (is_const(src))
1075 src = get_unconst(ctx, src);
1076
1077 instr = instr_create(ctx, 2, OPC_CMPS_F);
1078 ir3_reg_create(instr, regid(REG_P0, 0), 0);
1079 add_src_reg(ctx, instr, src, src->SwizzleX);
1080 add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1081 instr->cat2.condition = IR3_COND_EQ;
1082
1083 instr = instr_create(ctx, 0, OPC_BR);
1084 push_branch(ctx, instr);
1085 }
1086
1087 static void
1088 trans_else(const struct instr_translater *t,
1089 struct ir3_compile_context *ctx,
1090 struct tgsi_full_instruction *inst)
1091 {
1092 struct ir3_instruction *instr;
1093
1094 /* for first half of if/else/endif, generate a jump past the else: */
1095 instr = instr_create(ctx, 0, OPC_JUMP);
1096
1097 pop_branch(ctx);
1098 push_branch(ctx, instr);
1099 }
1100
1101 static void
1102 trans_endif(const struct instr_translater *t,
1103 struct ir3_compile_context *ctx,
1104 struct tgsi_full_instruction *inst)
1105 {
1106 pop_branch(ctx);
1107 }
1108
1109 /*
1110 * Handlers for TGSI instructions which do have 1:1 mapping to native
1111 * instructions:
1112 */
1113
1114 static void
1115 instr_cat0(const struct instr_translater *t,
1116 struct ir3_compile_context *ctx,
1117 struct tgsi_full_instruction *inst)
1118 {
1119 instr_create(ctx, 0, t->opc);
1120 }
1121
1122 static void
1123 instr_cat1(const struct instr_translater *t,
1124 struct ir3_compile_context *ctx,
1125 struct tgsi_full_instruction *inst)
1126 {
1127 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1128 struct tgsi_src_register *src = &inst->Src[0].Register;
1129
1130 /* mov instructions can't handle a negate on src: */
1131 if (src->Negate) {
1132 struct tgsi_src_register constval;
1133 struct ir3_instruction *instr;
1134
1135 /* since right now, we are using uniformly either TYPE_F16 or
1136 * TYPE_F32, and we don't utilize the conversion possibilities
1137 * of mov instructions, we can get away with substituting an
1138 * add.f which can handle negate. Might need to revisit this
1139 * in the future if we start supporting widening/narrowing or
1140 * conversion to/from integer..
1141 */
1142 instr = instr_create(ctx, 2, OPC_ADD_F);
1143 get_immediate(ctx, &constval, fui(0.0));
1144 vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
1145 } else {
1146 create_mov(ctx, dst, src);
1147 /* create_mov() generates vector sequence, so no vectorize() */
1148 }
1149 put_dst(ctx, inst, dst);
1150 }
1151
1152 static void
1153 instr_cat2(const struct instr_translater *t,
1154 struct ir3_compile_context *ctx,
1155 struct tgsi_full_instruction *inst)
1156 {
1157 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1158 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1159 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1160 struct ir3_instruction *instr;
1161 unsigned src0_flags = 0, src1_flags = 0;
1162
1163 switch (t->tgsi_opc) {
1164 case TGSI_OPCODE_ABS:
1165 src0_flags = IR3_REG_ABS;
1166 break;
1167 case TGSI_OPCODE_SUB:
1168 src1_flags = IR3_REG_NEGATE;
1169 break;
1170 }
1171
1172 switch (t->opc) {
1173 case OPC_ABSNEG_F:
1174 case OPC_ABSNEG_S:
1175 case OPC_CLZ_B:
1176 case OPC_CLZ_S:
1177 case OPC_SIGN_F:
1178 case OPC_FLOOR_F:
1179 case OPC_CEIL_F:
1180 case OPC_RNDNE_F:
1181 case OPC_RNDAZ_F:
1182 case OPC_TRUNC_F:
1183 case OPC_NOT_B:
1184 case OPC_BFREV_B:
1185 case OPC_SETRM:
1186 case OPC_CBITS_B:
1187 /* these only have one src reg */
1188 instr = instr_create(ctx, 2, t->opc);
1189 vectorize(ctx, instr, dst, 1, src0, src0_flags);
1190 break;
1191 default:
1192 if (is_const(src0) && is_const(src1))
1193 src0 = get_unconst(ctx, src0);
1194
1195 instr = instr_create(ctx, 2, t->opc);
1196 vectorize(ctx, instr, dst, 2, src0, src0_flags,
1197 src1, src1_flags);
1198 break;
1199 }
1200
1201 put_dst(ctx, inst, dst);
1202 }
1203
1204 static void
1205 instr_cat3(const struct instr_translater *t,
1206 struct ir3_compile_context *ctx,
1207 struct tgsi_full_instruction *inst)
1208 {
1209 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1210 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1211 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1212 struct ir3_instruction *instr;
1213
1214 /* in particular, can't handle const for src1 for cat3..
1215 * for mad, we can swap first two src's if needed:
1216 */
1217 if (is_rel_or_const(src1)) {
1218 if (is_mad(t->opc) && !is_rel_or_const(src0)) {
1219 struct tgsi_src_register *tmp;
1220 tmp = src0;
1221 src0 = src1;
1222 src1 = tmp;
1223 } else {
1224 src1 = get_unconst(ctx, src1);
1225 }
1226 }
1227
1228 instr = instr_create(ctx, 3,
1229 ctx->so->key.half_precision ? t->hopc : t->opc);
1230 vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
1231 &inst->Src[2].Register, 0);
1232 put_dst(ctx, inst, dst);
1233 }
1234
1235 static void
1236 instr_cat4(const struct instr_translater *t,
1237 struct ir3_compile_context *ctx,
1238 struct tgsi_full_instruction *inst)
1239 {
1240 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1241 struct tgsi_src_register *src = &inst->Src[0].Register;
1242 struct ir3_instruction *instr;
1243 unsigned i, n;
1244
1245 /* seems like blob compiler avoids const as src.. */
1246 if (is_const(src))
1247 src = get_unconst(ctx, src);
1248
1249 /* worst case: */
1250 add_nop(ctx, 6);
1251
1252 /* we need to replicate into each component: */
1253 for (i = 0, n = 0; i < 4; i++) {
1254 if (dst->WriteMask & (1 << i)) {
1255 if (n++)
1256 add_nop(ctx, 1);
1257 instr = instr_create(ctx, 4, t->opc);
1258 add_dst_reg(ctx, instr, dst, i);
1259 add_src_reg(ctx, instr, src, src->SwizzleX);
1260 }
1261 }
1262
1263 regmask_set(&ctx->needs_ss, instr->regs[0]);
1264 put_dst(ctx, inst, dst);
1265 }
1266
1267 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1268 #define INSTR(n, f, ...) \
1269 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1270
1271 INSTR(MOV, instr_cat1),
1272 INSTR(RCP, instr_cat4, .opc = OPC_RCP),
1273 INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
1274 INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
1275 INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
1276 INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
1277 INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
1278 INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
1279 INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
1280 INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1281 INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
1282 INSTR(CLAMP, trans_clamp),
1283 INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
1284 INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
1285 INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F),
1286 INSTR(ARL, trans_arl),
1287 INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
1288 INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
1289 INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
1290 INSTR(COS, instr_cat4, .opc = OPC_COS),
1291 INSTR(SIN, instr_cat4, .opc = OPC_SIN),
1292 INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1293 INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
1294 INSTR(SGT, trans_cmp),
1295 INSTR(SLT, trans_cmp),
1296 INSTR(SGE, trans_cmp),
1297 INSTR(SLE, trans_cmp),
1298 INSTR(SNE, trans_cmp),
1299 INSTR(SEQ, trans_cmp),
1300 INSTR(CMP, trans_cmp),
1301 INSTR(IF, trans_if),
1302 INSTR(ELSE, trans_else),
1303 INSTR(ENDIF, trans_endif),
1304 INSTR(END, instr_cat0, .opc = OPC_END),
1305 INSTR(KILL, instr_cat0, .opc = OPC_KILL),
1306 };
1307
1308 static ir3_semantic
1309 decl_semantic(const struct tgsi_declaration_semantic *sem)
1310 {
1311 return ir3_semantic_name(sem->Name, sem->Index);
1312 }
1313
1314 static int
1315 decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1316 {
1317 struct ir3_shader_variant *so = ctx->so;
1318 unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
1319 unsigned i, flags = 0;
1320 int nop = 0;
1321
1322 /* I don't think we should get frag shader input without
1323 * semantic info? Otherwise how do inputs get linked to
1324 * vert outputs?
1325 */
1326 compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
1327 decl->Declaration.Semantic);
1328
1329 if (ctx->so->key.half_precision)
1330 flags |= IR3_REG_HALF;
1331
1332 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1333 unsigned n = so->inputs_count++;
1334 unsigned r = regid(i + base, 0);
1335 unsigned ncomp;
1336
1337 /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1338 ncomp = 4;
1339
1340 DBG("decl in -> r%d", i + base); // XXX
1341
1342 compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
1343
1344 so->inputs[n].semantic = decl_semantic(&decl->Semantic);
1345 so->inputs[n].compmask = (1 << ncomp) - 1;
1346 so->inputs[n].ncomp = ncomp;
1347 so->inputs[n].regid = r;
1348 so->inputs[n].inloc = ctx->next_inloc;
1349 so->inputs[n].bary = true; /* all that is supported */
1350 ctx->next_inloc += ncomp;
1351
1352 so->total_in += ncomp;
1353
1354 /* for frag shaders, we need to generate the corresponding bary instr: */
1355 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1356 unsigned j;
1357
1358 for (j = 0; j < ncomp; j++) {
1359 struct ir3_instruction *instr;
1360 struct ir3_register *dst;
1361
1362 instr = instr_create(ctx, 2, OPC_BARY_F);
1363
1364 /* dst register: */
1365 dst = ir3_reg_create(instr, r + j, flags);
1366 ctx->last_input = dst;
1367
1368 /* input position: */
1369 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val =
1370 so->inputs[n].inloc + j - 8;
1371
1372 /* input base (always r0.xy): */
1373 ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3;
1374 }
1375
1376 nop = 6;
1377 }
1378 }
1379
1380 return nop;
1381 }
1382
1383 static void
1384 decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1385 {
1386 struct ir3_shader_variant *so = ctx->so;
1387 unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1388 unsigned comp = 0;
1389 unsigned name = decl->Semantic.Name;
1390 unsigned i;
1391
1392 compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
1393
1394 DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
1395
1396 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
1397 switch (name) {
1398 case TGSI_SEMANTIC_POSITION:
1399 so->writes_pos = true;
1400 break;
1401 case TGSI_SEMANTIC_PSIZE:
1402 so->writes_psize = true;
1403 break;
1404 case TGSI_SEMANTIC_COLOR:
1405 case TGSI_SEMANTIC_BCOLOR:
1406 case TGSI_SEMANTIC_GENERIC:
1407 case TGSI_SEMANTIC_FOG:
1408 case TGSI_SEMANTIC_TEXCOORD:
1409 break;
1410 default:
1411 compile_error(ctx, "unknown VS semantic name: %s\n",
1412 tgsi_semantic_names[name]);
1413 }
1414 } else {
1415 switch (name) {
1416 case TGSI_SEMANTIC_POSITION:
1417 comp = 2; /* tgsi will write to .z component */
1418 so->writes_pos = true;
1419 break;
1420 case TGSI_SEMANTIC_COLOR:
1421 break;
1422 default:
1423 compile_error(ctx, "unknown FS semantic name: %s\n",
1424 tgsi_semantic_names[name]);
1425 }
1426 }
1427
1428 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1429 unsigned n = so->outputs_count++;
1430 compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
1431 so->outputs[n].semantic = decl_semantic(&decl->Semantic);
1432 so->outputs[n].regid = regid(i + base, comp);
1433 }
1434 }
1435
1436 static void
1437 decl_samp(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1438 {
1439 ctx->so->has_samp = true;
1440 }
1441
1442 static void
1443 compile_instructions(struct ir3_compile_context *ctx)
1444 {
1445 struct ir3 *ir = ctx->ir;
1446 int nop = 0;
1447
1448 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1449 tgsi_parse_token(&ctx->parser);
1450
1451 switch (ctx->parser.FullToken.Token.Type) {
1452 case TGSI_TOKEN_TYPE_DECLARATION: {
1453 struct tgsi_full_declaration *decl =
1454 &ctx->parser.FullToken.FullDeclaration;
1455 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
1456 decl_out(ctx, decl);
1457 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
1458 nop = decl_in(ctx, decl);
1459 } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
1460 decl_samp(ctx, decl);
1461 }
1462 break;
1463 }
1464 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1465 /* TODO: if we know the immediate is small enough, and only
1466 * used with instructions that can embed an immediate, we
1467 * can skip this:
1468 */
1469 struct tgsi_full_immediate *imm =
1470 &ctx->parser.FullToken.FullImmediate;
1471 unsigned n = ctx->so->immediates_count++;
1472 memcpy(ctx->so->immediates[n].val, imm->u, 16);
1473 break;
1474 }
1475 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1476 struct tgsi_full_instruction *inst =
1477 &ctx->parser.FullToken.FullInstruction;
1478 unsigned opc = inst->Instruction.Opcode;
1479 const struct instr_translater *t = &translaters[opc];
1480
1481 add_nop(ctx, nop);
1482 nop = 0;
1483
1484 if (t->fxn) {
1485 t->fxn(t, ctx, inst);
1486 ctx->num_internal_temps = 0;
1487 } else {
1488 compile_error(ctx, "unknown TGSI opc: %s\n",
1489 tgsi_get_opcode_name(opc));
1490 }
1491
1492 switch (inst->Instruction.Saturate) {
1493 case TGSI_SAT_ZERO_ONE:
1494 create_clamp_imm(ctx, &inst->Dst[0].Register,
1495 fui(0.0), fui(1.0));
1496 break;
1497 case TGSI_SAT_MINUS_PLUS_ONE:
1498 create_clamp_imm(ctx, &inst->Dst[0].Register,
1499 fui(-1.0), fui(1.0));
1500 break;
1501 }
1502
1503 break;
1504 }
1505 default:
1506 break;
1507 }
1508 }
1509
1510 if (ir->instrs_count > 0)
1511 ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1512
1513 if (ctx->last_input)
1514 ctx->last_input->flags |= IR3_REG_EI;
1515
1516 handle_last_rel(ctx);
1517 }
1518
1519 int
1520 ir3_compile_shader_old(struct ir3_shader_variant *so,
1521 const struct tgsi_token *tokens, struct ir3_shader_key key)
1522 {
1523 struct ir3_compile_context ctx;
1524
1525 assert(!so->ir);
1526
1527 so->ir = ir3_create();
1528
1529 assert(so->ir);
1530
1531 if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
1532 return -1;
1533
1534 compile_instructions(&ctx);
1535
1536 compile_free(&ctx);
1537
1538 return 0;
1539 }