4267feb351fe76679e872abb279cfe92d9b8444a
[mesa.git] / src / gallium / drivers / freedreno / ir3 / ir3_compiler_old.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include <stdarg.h>
30
31 #include "pipe/p_state.h"
32 #include "util/u_string.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_info.h"
38 #include "tgsi/tgsi_strings.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_scan.h"
41
42 #include "freedreno_lowering.h"
43 #include "freedreno_util.h"
44
45 #include "ir3_compiler.h"
46 #include "ir3_shader.h"
47
48 #include "instr-a3xx.h"
49 #include "ir3.h"
50
51
52 struct ir3_compile_context {
53 const struct tgsi_token *tokens;
54 bool free_tokens;
55 struct ir3 *ir;
56 struct ir3_block *block;
57 struct ir3_shader_variant *so;
58
59 struct tgsi_parse_context parser;
60 unsigned type;
61
62 struct tgsi_shader_info info;
63
64 /* last input dst (for setting (ei) flag): */
65 struct ir3_register *last_input;
66
67 /* last instruction with relative addressing: */
68 struct ir3_instruction *last_rel;
69
70 /* for calculating input/output positions/linkages: */
71 unsigned next_inloc;
72
73 unsigned num_internal_temps;
74 struct tgsi_src_register internal_temps[6];
75
76 /* track registers which need to synchronize w/ "complex alu" cat3
77 * instruction pipeline:
78 */
79 regmask_t needs_ss;
80
81 /* track registers which need to synchronize with texture fetch
82 * pipeline:
83 */
84 regmask_t needs_sy;
85
86 /* inputs start at r0, temporaries start after last input, and
87 * outputs start after last temporary.
88 *
89 * We could be more clever, because this is not a hw restriction,
90 * but probably best just to implement an optimizing pass to
91 * reduce the # of registers used and get rid of redundant mov's
92 * (to output register).
93 */
94 unsigned base_reg[TGSI_FILE_COUNT];
95
96 /* idx/slot for last compiler generated immediate */
97 unsigned immediate_idx;
98
99 /* stack of branch instructions that start (potentially nested)
100 * branch instructions, so that we can fix up the branch targets
101 * so that we can fix up the branch target on the corresponding
102 * END instruction
103 */
104 struct ir3_instruction *branch[16];
105 unsigned int branch_count;
106
107 /* used when dst is same as one of the src, to avoid overwriting a
108 * src element before the remaining scalar instructions that make
109 * up the vector operation
110 */
111 struct tgsi_dst_register tmp_dst;
112 struct tgsi_src_register *tmp_src;
113 };
114
115
116 static void vectorize(struct ir3_compile_context *ctx,
117 struct ir3_instruction *instr, struct tgsi_dst_register *dst,
118 int nsrcs, ...);
119 static void create_mov(struct ir3_compile_context *ctx,
120 struct tgsi_dst_register *dst, struct tgsi_src_register *src);
121
122 static unsigned
123 compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
124 const struct tgsi_token *tokens)
125 {
126 unsigned ret, base = 0;
127 struct tgsi_shader_info *info = &ctx->info;
128 struct fd_lowering_config lconfig = {
129 .color_two_side = so->key.color_two_side,
130 .lower_DST = true,
131 .lower_XPD = true,
132 .lower_SCS = true,
133 .lower_LRP = true,
134 .lower_FRC = true,
135 .lower_POW = true,
136 .lower_LIT = true,
137 .lower_EXP = true,
138 .lower_LOG = true,
139 .lower_DP4 = true,
140 .lower_DP3 = true,
141 .lower_DPH = true,
142 .lower_DP2 = true,
143 .lower_DP2A = true,
144 };
145
146 switch (so->type) {
147 case SHADER_FRAGMENT:
148 case SHADER_COMPUTE:
149 lconfig.saturate_s = so->key.fsaturate_s;
150 lconfig.saturate_t = so->key.fsaturate_t;
151 lconfig.saturate_r = so->key.fsaturate_r;
152 break;
153 case SHADER_VERTEX:
154 lconfig.saturate_s = so->key.vsaturate_s;
155 lconfig.saturate_t = so->key.vsaturate_t;
156 lconfig.saturate_r = so->key.vsaturate_r;
157 break;
158 }
159
160 ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
161 ctx->free_tokens = !!ctx->tokens;
162 if (!ctx->tokens) {
163 /* no lowering */
164 ctx->tokens = tokens;
165 }
166 ctx->ir = so->ir;
167 ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
168 ctx->so = so;
169 ctx->last_input = NULL;
170 ctx->last_rel = NULL;
171 ctx->next_inloc = 8;
172 ctx->num_internal_temps = 0;
173 ctx->branch_count = 0;
174
175 regmask_init(&ctx->needs_ss);
176 regmask_init(&ctx->needs_sy);
177 memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
178
179 /* Immediates go after constants: */
180 ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
181 ctx->base_reg[TGSI_FILE_IMMEDIATE] =
182 info->file_max[TGSI_FILE_CONSTANT] + 1;
183
184 /* if full precision and fragment shader, don't clobber
185 * r0.x w/ bary fetch:
186 */
187 if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision)
188 base = 1;
189
190 /* Temporaries after outputs after inputs: */
191 ctx->base_reg[TGSI_FILE_INPUT] = base;
192 ctx->base_reg[TGSI_FILE_OUTPUT] = base +
193 info->file_max[TGSI_FILE_INPUT] + 1;
194 ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
195 info->file_max[TGSI_FILE_INPUT] + 1 +
196 info->file_max[TGSI_FILE_OUTPUT] + 1;
197
198 so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
199 ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
200
201 ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
202 if (ret != TGSI_PARSE_OK)
203 return ret;
204
205 ctx->type = ctx->parser.FullHeader.Processor.Processor;
206
207 return ret;
208 }
209
210 static void
211 compile_error(struct ir3_compile_context *ctx, const char *format, ...)
212 {
213 va_list ap;
214 va_start(ap, format);
215 _debug_vprintf(format, ap);
216 va_end(ap);
217 tgsi_dump(ctx->tokens, 0);
218 debug_assert(0);
219 }
220
221 #define compile_assert(ctx, cond) do { \
222 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
223 } while (0)
224
225 static void
226 compile_free(struct ir3_compile_context *ctx)
227 {
228 if (ctx->free_tokens)
229 free((void *)ctx->tokens);
230 tgsi_parse_free(&ctx->parser);
231 }
232
233 struct instr_translater {
234 void (*fxn)(const struct instr_translater *t,
235 struct ir3_compile_context *ctx,
236 struct tgsi_full_instruction *inst);
237 unsigned tgsi_opc;
238 opc_t opc;
239 opc_t hopc; /* opc to use for half_precision mode, if different */
240 unsigned arg;
241 };
242
243 static void
244 handle_last_rel(struct ir3_compile_context *ctx)
245 {
246 if (ctx->last_rel) {
247 ctx->last_rel->flags |= IR3_INSTR_UL;
248 ctx->last_rel = NULL;
249 }
250 }
251
252 static struct ir3_instruction *
253 instr_create(struct ir3_compile_context *ctx, int category, opc_t opc)
254 {
255 return ir3_instr_create(ctx->block, category, opc);
256 }
257
258 static void
259 add_nop(struct ir3_compile_context *ctx, unsigned count)
260 {
261 while (count-- > 0)
262 instr_create(ctx, 0, OPC_NOP);
263 }
264
265 static unsigned
266 src_flags(struct ir3_compile_context *ctx, struct ir3_register *reg)
267 {
268 unsigned flags = 0;
269
270 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
271 return flags;
272
273 if (regmask_get(&ctx->needs_ss, reg)) {
274 flags |= IR3_INSTR_SS;
275 regmask_init(&ctx->needs_ss);
276 }
277
278 if (regmask_get(&ctx->needs_sy, reg)) {
279 flags |= IR3_INSTR_SY;
280 regmask_init(&ctx->needs_sy);
281 }
282
283 return flags;
284 }
285
286 static struct ir3_register *
287 add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
288 const struct tgsi_dst_register *dst, unsigned chan)
289 {
290 unsigned flags = 0, num = 0;
291 struct ir3_register *reg;
292
293 switch (dst->File) {
294 case TGSI_FILE_OUTPUT:
295 case TGSI_FILE_TEMPORARY:
296 num = dst->Index + ctx->base_reg[dst->File];
297 break;
298 case TGSI_FILE_ADDRESS:
299 num = REG_A0;
300 break;
301 default:
302 compile_error(ctx, "unsupported dst register file: %s\n",
303 tgsi_file_name(dst->File));
304 break;
305 }
306
307 if (dst->Indirect)
308 flags |= IR3_REG_RELATIV;
309 if (ctx->so->key.half_precision)
310 flags |= IR3_REG_HALF;
311
312 reg = ir3_reg_create(instr, regid(num, chan), flags);
313
314 if (dst->Indirect)
315 ctx->last_rel = instr;
316
317 return reg;
318 }
319
320 static struct ir3_register *
321 add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
322 const struct tgsi_src_register *src, unsigned chan)
323 {
324 unsigned flags = 0, num = 0;
325 struct ir3_register *reg;
326
327 /* TODO we need to use a mov to temp for const >= 64.. or maybe
328 * we could use relative addressing..
329 */
330 compile_assert(ctx, src->Index < 64);
331
332 switch (src->File) {
333 case TGSI_FILE_IMMEDIATE:
334 /* TODO if possible, use actual immediate instead of const.. but
335 * TGSI has vec4 immediates, we can only embed scalar (of limited
336 * size, depending on instruction..)
337 */
338 case TGSI_FILE_CONSTANT:
339 flags |= IR3_REG_CONST;
340 num = src->Index + ctx->base_reg[src->File];
341 break;
342 case TGSI_FILE_OUTPUT:
343 /* NOTE: we should only end up w/ OUTPUT file for things like
344 * clamp()'ing saturated dst instructions
345 */
346 case TGSI_FILE_INPUT:
347 case TGSI_FILE_TEMPORARY:
348 num = src->Index + ctx->base_reg[src->File];
349 break;
350 default:
351 compile_error(ctx, "unsupported src register file: %s\n",
352 tgsi_file_name(src->File));
353 break;
354 }
355
356 if (src->Absolute)
357 flags |= IR3_REG_ABS;
358 if (src->Negate)
359 flags |= IR3_REG_NEGATE;
360 if (src->Indirect)
361 flags |= IR3_REG_RELATIV;
362 if (ctx->so->key.half_precision)
363 flags |= IR3_REG_HALF;
364
365 reg = ir3_reg_create(instr, regid(num, chan), flags);
366
367 if (src->Indirect)
368 ctx->last_rel = instr;
369
370 instr->flags |= src_flags(ctx, reg);
371
372 return reg;
373 }
374
375 static void
376 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
377 {
378 src->File = dst->File;
379 src->Indirect = dst->Indirect;
380 src->Dimension = dst->Dimension;
381 src->Index = dst->Index;
382 src->Absolute = 0;
383 src->Negate = 0;
384 src->SwizzleX = TGSI_SWIZZLE_X;
385 src->SwizzleY = TGSI_SWIZZLE_Y;
386 src->SwizzleZ = TGSI_SWIZZLE_Z;
387 src->SwizzleW = TGSI_SWIZZLE_W;
388 }
389
390 /* Get internal-temp src/dst to use for a sequence of instructions
391 * generated by a single TGSI op.
392 */
393 static struct tgsi_src_register *
394 get_internal_temp(struct ir3_compile_context *ctx,
395 struct tgsi_dst_register *tmp_dst)
396 {
397 struct tgsi_src_register *tmp_src;
398 int n;
399
400 tmp_dst->File = TGSI_FILE_TEMPORARY;
401 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
402 tmp_dst->Indirect = 0;
403 tmp_dst->Dimension = 0;
404
405 /* assign next temporary: */
406 n = ctx->num_internal_temps++;
407 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
408 tmp_src = &ctx->internal_temps[n];
409
410 tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
411
412 src_from_dst(tmp_src, tmp_dst);
413
414 return tmp_src;
415 }
416
417 /* Get internal half-precision temp src/dst to use for a sequence of
418 * instructions generated by a single TGSI op.
419 */
420 static struct tgsi_src_register *
421 get_internal_temp_hr(struct ir3_compile_context *ctx,
422 struct tgsi_dst_register *tmp_dst)
423 {
424 struct tgsi_src_register *tmp_src;
425 int n;
426
427 if (ctx->so->key.half_precision)
428 return get_internal_temp(ctx, tmp_dst);
429
430 tmp_dst->File = TGSI_FILE_TEMPORARY;
431 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
432 tmp_dst->Indirect = 0;
433 tmp_dst->Dimension = 0;
434
435 /* assign next temporary: */
436 n = ctx->num_internal_temps++;
437 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
438 tmp_src = &ctx->internal_temps[n];
439
440 /* just use hr0 because no one else should be using half-
441 * precision regs:
442 */
443 tmp_dst->Index = 0;
444
445 src_from_dst(tmp_src, tmp_dst);
446
447 return tmp_src;
448 }
449
450 static inline bool
451 is_const(struct tgsi_src_register *src)
452 {
453 return (src->File == TGSI_FILE_CONSTANT) ||
454 (src->File == TGSI_FILE_IMMEDIATE);
455 }
456
457 static inline bool
458 is_relative(struct tgsi_src_register *src)
459 {
460 return src->Indirect;
461 }
462
463 static inline bool
464 is_rel_or_const(struct tgsi_src_register *src)
465 {
466 return is_relative(src) || is_const(src);
467 }
468
469 static type_t
470 get_ftype(struct ir3_compile_context *ctx)
471 {
472 return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32;
473 }
474
475 static type_t
476 get_utype(struct ir3_compile_context *ctx)
477 {
478 return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32;
479 }
480
481 static unsigned
482 src_swiz(struct tgsi_src_register *src, int chan)
483 {
484 switch (chan) {
485 case 0: return src->SwizzleX;
486 case 1: return src->SwizzleY;
487 case 2: return src->SwizzleZ;
488 case 3: return src->SwizzleW;
489 }
490 assert(0);
491 return 0;
492 }
493
494 /* for instructions that cannot take a const register as src, if needed
495 * generate a move to temporary gpr:
496 */
497 static struct tgsi_src_register *
498 get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src)
499 {
500 struct tgsi_dst_register tmp_dst;
501 struct tgsi_src_register *tmp_src;
502
503 compile_assert(ctx, is_rel_or_const(src));
504
505 tmp_src = get_internal_temp(ctx, &tmp_dst);
506
507 create_mov(ctx, &tmp_dst, src);
508
509 return tmp_src;
510 }
511
512 static void
513 get_immediate(struct ir3_compile_context *ctx,
514 struct tgsi_src_register *reg, uint32_t val)
515 {
516 unsigned neg, swiz, idx, i;
517 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
518 static const unsigned swiz2tgsi[] = {
519 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
520 };
521
522 for (i = 0; i < ctx->immediate_idx; i++) {
523 swiz = i % 4;
524 idx = i / 4;
525
526 if (ctx->so->immediates[idx].val[swiz] == val) {
527 neg = 0;
528 break;
529 }
530
531 if (ctx->so->immediates[idx].val[swiz] == -val) {
532 neg = 1;
533 break;
534 }
535 }
536
537 if (i == ctx->immediate_idx) {
538 /* need to generate a new immediate: */
539 swiz = i % 4;
540 idx = i / 4;
541 neg = 0;
542 ctx->so->immediates[idx].val[swiz] = val;
543 ctx->so->immediates_count = idx + 1;
544 ctx->immediate_idx++;
545 }
546
547 reg->File = TGSI_FILE_IMMEDIATE;
548 reg->Indirect = 0;
549 reg->Dimension = 0;
550 reg->Index = idx;
551 reg->Absolute = 0;
552 reg->Negate = neg;
553 reg->SwizzleX = swiz2tgsi[swiz];
554 reg->SwizzleY = swiz2tgsi[swiz];
555 reg->SwizzleZ = swiz2tgsi[swiz];
556 reg->SwizzleW = swiz2tgsi[swiz];
557 }
558
559 static void
560 create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst,
561 struct tgsi_src_register *src)
562 {
563 type_t type_mov = get_ftype(ctx);
564 unsigned i;
565
566 for (i = 0; i < 4; i++) {
567 /* move to destination: */
568 if (dst->WriteMask & (1 << i)) {
569 struct ir3_instruction *instr;
570
571 if (src->Absolute || src->Negate) {
572 /* can't have abs or neg on a mov instr, so use
573 * absneg.f instead to handle these cases:
574 */
575 instr = instr_create(ctx, 2, OPC_ABSNEG_F);
576 } else {
577 instr = instr_create(ctx, 1, 0);
578 instr->cat1.src_type = type_mov;
579 instr->cat1.dst_type = type_mov;
580 }
581
582 add_dst_reg(ctx, instr, dst, i);
583 add_src_reg(ctx, instr, src, src_swiz(src, i));
584 } else {
585 add_nop(ctx, 1);
586 }
587 }
588 }
589
590 static void
591 create_clamp(struct ir3_compile_context *ctx,
592 struct tgsi_dst_register *dst, struct tgsi_src_register *val,
593 struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
594 {
595 struct ir3_instruction *instr;
596
597 instr = instr_create(ctx, 2, OPC_MAX_F);
598 vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
599
600 instr = instr_create(ctx, 2, OPC_MIN_F);
601 vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
602 }
603
604 static void
605 create_clamp_imm(struct ir3_compile_context *ctx,
606 struct tgsi_dst_register *dst,
607 uint32_t minval, uint32_t maxval)
608 {
609 struct tgsi_src_register minconst, maxconst;
610 struct tgsi_src_register src;
611
612 src_from_dst(&src, dst);
613
614 get_immediate(ctx, &minconst, minval);
615 get_immediate(ctx, &maxconst, maxval);
616
617 create_clamp(ctx, dst, &src, &minconst, &maxconst);
618 }
619
620 static struct tgsi_dst_register *
621 get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst)
622 {
623 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
624 unsigned i;
625 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
626 struct tgsi_src_register *src = &inst->Src[i].Register;
627 if ((src->File == dst->File) && (src->Index == dst->Index)) {
628 if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
629 (src->SwizzleX == TGSI_SWIZZLE_X) &&
630 (src->SwizzleY == TGSI_SWIZZLE_Y) &&
631 (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
632 (src->SwizzleW == TGSI_SWIZZLE_W))
633 continue;
634 ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
635 ctx->tmp_dst.WriteMask = dst->WriteMask;
636 dst = &ctx->tmp_dst;
637 break;
638 }
639 }
640 return dst;
641 }
642
643 static void
644 put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst,
645 struct tgsi_dst_register *dst)
646 {
647 /* if necessary, add mov back into original dst: */
648 if (dst != &inst->Dst[0].Register) {
649 create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
650 }
651 }
652
653 /* helper to generate the necessary repeat and/or additional instructions
654 * to turn a scalar instruction into a vector operation:
655 */
656 static void
657 vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
658 struct tgsi_dst_register *dst, int nsrcs, ...)
659 {
660 va_list ap;
661 int i, j, n = 0;
662 bool indirect = dst->Indirect;
663
664 add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X);
665
666 va_start(ap, nsrcs);
667 for (j = 0; j < nsrcs; j++) {
668 struct tgsi_src_register *src =
669 va_arg(ap, struct tgsi_src_register *);
670 unsigned flags = va_arg(ap, unsigned);
671 struct ir3_register *reg;
672 if (flags & IR3_REG_IMMED) {
673 reg = ir3_reg_create(instr, 0, IR3_REG_IMMED);
674 /* this is an ugly cast.. should have put flags first! */
675 reg->iim_val = *(int *)&src;
676 } else {
677 reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X);
678 indirect |= src->Indirect;
679 }
680 reg->flags |= flags & ~IR3_REG_NEGATE;
681 if (flags & IR3_REG_NEGATE)
682 reg->flags ^= IR3_REG_NEGATE;
683 }
684 va_end(ap);
685
686 for (i = 0; i < 4; i++) {
687 if (dst->WriteMask & (1 << i)) {
688 struct ir3_instruction *cur;
689
690 if (n++ == 0) {
691 cur = instr;
692 } else {
693 cur = ir3_instr_clone(instr);
694 cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
695 }
696
697 /* fix-up dst register component: */
698 cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
699
700 /* fix-up src register component: */
701 va_start(ap, nsrcs);
702 for (j = 0; j < nsrcs; j++) {
703 struct tgsi_src_register *src =
704 va_arg(ap, struct tgsi_src_register *);
705 unsigned flags = va_arg(ap, unsigned);
706 if (!(flags & IR3_REG_IMMED)) {
707 cur->regs[j+1]->num =
708 regid(cur->regs[j+1]->num >> 2,
709 src_swiz(src, i));
710 cur->flags |= src_flags(ctx, cur->regs[j+1]);
711 }
712 }
713 va_end(ap);
714
715 if (indirect)
716 ctx->last_rel = cur;
717 }
718 }
719
720 /* pad w/ nop's.. at least until we are clever enough to
721 * figure out if we really need to..
722 */
723 add_nop(ctx, 4 - n);
724 }
725
726 /*
727 * Handlers for TGSI instructions which do not have a 1:1 mapping to
728 * native instructions:
729 */
730
731 static void
732 trans_clamp(const struct instr_translater *t,
733 struct ir3_compile_context *ctx,
734 struct tgsi_full_instruction *inst)
735 {
736 struct tgsi_dst_register *dst = get_dst(ctx, inst);
737 struct tgsi_src_register *src0 = &inst->Src[0].Register;
738 struct tgsi_src_register *src1 = &inst->Src[1].Register;
739 struct tgsi_src_register *src2 = &inst->Src[2].Register;
740
741 create_clamp(ctx, dst, src0, src1, src2);
742
743 put_dst(ctx, inst, dst);
744 }
745
746 /* ARL(x) = x, but mova from hrN.x to a0.. */
747 static void
748 trans_arl(const struct instr_translater *t,
749 struct ir3_compile_context *ctx,
750 struct tgsi_full_instruction *inst)
751 {
752 struct ir3_instruction *instr;
753 struct tgsi_dst_register tmp_dst;
754 struct tgsi_src_register *tmp_src;
755 struct tgsi_dst_register *dst = &inst->Dst[0].Register;
756 struct tgsi_src_register *src = &inst->Src[0].Register;
757 unsigned chan = src->SwizzleX;
758 compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
759
760 handle_last_rel(ctx);
761
762 tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
763
764 /* cov.{f32,f16}s16 Rtmp, Rsrc */
765 instr = instr_create(ctx, 1, 0);
766 instr->cat1.src_type = get_ftype(ctx);
767 instr->cat1.dst_type = TYPE_S16;
768 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
769 add_src_reg(ctx, instr, src, chan);
770
771 add_nop(ctx, 3);
772
773 /* shl.b Rtmp, Rtmp, 2 */
774 instr = instr_create(ctx, 2, OPC_SHL_B);
775 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
776 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
777 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
778
779 add_nop(ctx, 3);
780
781 /* mova a0, Rtmp */
782 instr = instr_create(ctx, 1, 0);
783 instr->cat1.src_type = TYPE_S16;
784 instr->cat1.dst_type = TYPE_S16;
785 add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
786 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
787
788 /* need to ensure 5 instr slots before a0 is used: */
789 add_nop(ctx, 6);
790 }
791
792 /* texture fetch/sample instructions: */
793 static void
794 trans_samp(const struct instr_translater *t,
795 struct ir3_compile_context *ctx,
796 struct tgsi_full_instruction *inst)
797 {
798 struct ir3_register *r;
799 struct ir3_instruction *instr;
800 struct tgsi_src_register *coord = &inst->Src[0].Register;
801 struct tgsi_src_register *samp = &inst->Src[1].Register;
802 unsigned tex = inst->Texture.Texture;
803 int8_t *order;
804 unsigned i, flags = 0, src_wrmask;
805 bool needs_mov = false;
806
807 switch (t->arg) {
808 case TGSI_OPCODE_TEX:
809 if (tex == TGSI_TEXTURE_2D) {
810 order = (int8_t[4]){ 0, 1, -1, -1 };
811 src_wrmask = TGSI_WRITEMASK_XY;
812 } else {
813 order = (int8_t[4]){ 0, 1, 2, -1 };
814 src_wrmask = TGSI_WRITEMASK_XYZ;
815 }
816 break;
817 case TGSI_OPCODE_TXP:
818 if (tex == TGSI_TEXTURE_2D) {
819 order = (int8_t[4]){ 0, 1, 3, -1 };
820 src_wrmask = TGSI_WRITEMASK_XYZ;
821 } else {
822 order = (int8_t[4]){ 0, 1, 2, 3 };
823 src_wrmask = TGSI_WRITEMASK_XYZW;
824 }
825 flags |= IR3_INSTR_P;
826 break;
827 default:
828 compile_assert(ctx, 0);
829 break;
830 }
831
832 if ((tex == TGSI_TEXTURE_3D) || (tex == TGSI_TEXTURE_CUBE)) {
833 add_nop(ctx, 3);
834 flags |= IR3_INSTR_3D;
835 }
836
837 /* cat5 instruction cannot seem to handle const or relative: */
838 if (is_rel_or_const(coord))
839 needs_mov = true;
840
841 /* The texture sample instructions need to coord in successive
842 * registers/components (ie. src.xy but not src.yx). And TXP
843 * needs the .w component in .z for 2D.. so in some cases we
844 * might need to emit some mov instructions to shuffle things
845 * around:
846 */
847 for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
848 if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
849 needs_mov = true;
850
851 if (needs_mov) {
852 struct tgsi_dst_register tmp_dst;
853 struct tgsi_src_register *tmp_src;
854 unsigned j;
855
856 type_t type_mov = get_ftype(ctx);
857
858 /* need to move things around: */
859 tmp_src = get_internal_temp(ctx, &tmp_dst);
860
861 for (j = 0; (j < 4) && (order[j] >= 0); j++) {
862 instr = instr_create(ctx, 1, 0);
863 instr->cat1.src_type = type_mov;
864 instr->cat1.dst_type = type_mov;
865 add_dst_reg(ctx, instr, &tmp_dst, j);
866 add_src_reg(ctx, instr, coord,
867 src_swiz(coord, order[j]));
868 }
869
870 coord = tmp_src;
871
872 add_nop(ctx, 4 - j);
873 }
874
875 instr = instr_create(ctx, 5, t->opc);
876 instr->cat5.type = get_ftype(ctx);
877 instr->cat5.samp = samp->Index;
878 instr->cat5.tex = samp->Index;
879 instr->flags |= flags;
880
881 r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
882 r->wrmask = inst->Dst[0].Register.WriteMask;
883
884 add_src_reg(ctx, instr, coord, coord->SwizzleX)->wrmask = src_wrmask;
885
886 /* after add_src_reg() so we don't set (sy) on sam instr itself! */
887 regmask_set(&ctx->needs_sy, r);
888 }
889
890 /*
891 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
892 * cmps.f.eq tmp0, b, a
893 * cov.u16f16 dst, tmp0
894 *
895 * SNE(a,b) = (a != b) ? 1.0 : 0.0
896 * cmps.f.eq tmp0, b, a
897 * add.s tmp0, tmp0, -1
898 * sel.f16 dst, {0.0}, tmp0, {1.0}
899 *
900 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
901 * cmps.f.ge tmp0, a, b
902 * cov.u16f16 dst, tmp0
903 *
904 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
905 * cmps.f.ge tmp0, b, a
906 * cov.u16f16 dst, tmp0
907 *
908 * SGT(a,b) = (a > b) ? 1.0 : 0.0
909 * cmps.f.ge tmp0, b, a
910 * add.s tmp0, tmp0, -1
911 * sel.f16 dst, {0.0}, tmp0, {1.0}
912 *
913 * SLT(a,b) = (a < b) ? 1.0 : 0.0
914 * cmps.f.ge tmp0, a, b
915 * add.s tmp0, tmp0, -1
916 * sel.f16 dst, {0.0}, tmp0, {1.0}
917 *
918 * CMP(a,b,c) = (a < 0.0) ? b : c
919 * cmps.f.ge tmp0, a, {0.0}
920 * add.s tmp0, tmp0, -1
921 * sel.f16 dst, c, tmp0, b
922 */
923 static void
924 trans_cmp(const struct instr_translater *t,
925 struct ir3_compile_context *ctx,
926 struct tgsi_full_instruction *inst)
927 {
928 struct ir3_instruction *instr;
929 struct tgsi_dst_register tmp_dst;
930 struct tgsi_src_register *tmp_src;
931 struct tgsi_src_register constval0, constval1;
932 /* final instruction for CMP() uses orig src1 and src2: */
933 struct tgsi_dst_register *dst = get_dst(ctx, inst);
934 struct tgsi_src_register *a0, *a1;
935 unsigned condition;
936
937 tmp_src = get_internal_temp(ctx, &tmp_dst);
938
939 switch (t->tgsi_opc) {
940 case TGSI_OPCODE_SEQ:
941 case TGSI_OPCODE_SNE:
942 a0 = &inst->Src[1].Register; /* b */
943 a1 = &inst->Src[0].Register; /* a */
944 condition = IR3_COND_EQ;
945 break;
946 case TGSI_OPCODE_SGE:
947 case TGSI_OPCODE_SLT:
948 a0 = &inst->Src[0].Register; /* a */
949 a1 = &inst->Src[1].Register; /* b */
950 condition = IR3_COND_GE;
951 break;
952 case TGSI_OPCODE_SLE:
953 case TGSI_OPCODE_SGT:
954 a0 = &inst->Src[1].Register; /* b */
955 a1 = &inst->Src[0].Register; /* a */
956 condition = IR3_COND_GE;
957 break;
958 case TGSI_OPCODE_CMP:
959 get_immediate(ctx, &constval0, fui(0.0));
960 a0 = &inst->Src[0].Register; /* a */
961 a1 = &constval0; /* {0.0} */
962 condition = IR3_COND_GE;
963 break;
964 default:
965 compile_assert(ctx, 0);
966 return;
967 }
968
969 if (is_const(a0) && is_const(a1))
970 a0 = get_unconst(ctx, a0);
971
972 /* cmps.f.ge tmp, a0, a1 */
973 instr = instr_create(ctx, 2, OPC_CMPS_F);
974 instr->cat2.condition = condition;
975 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
976
977 switch (t->tgsi_opc) {
978 case TGSI_OPCODE_SEQ:
979 case TGSI_OPCODE_SGE:
980 case TGSI_OPCODE_SLE:
981 /* cov.u16f16 dst, tmp0 */
982 instr = instr_create(ctx, 1, 0);
983 instr->cat1.src_type = get_utype(ctx);
984 instr->cat1.dst_type = get_ftype(ctx);
985 vectorize(ctx, instr, dst, 1, tmp_src, 0);
986 break;
987 case TGSI_OPCODE_SNE:
988 case TGSI_OPCODE_SGT:
989 case TGSI_OPCODE_SLT:
990 case TGSI_OPCODE_CMP:
991 /* add.s tmp, tmp, -1 */
992 instr = instr_create(ctx, 2, OPC_ADD_S);
993 vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
994
995 if (t->tgsi_opc == TGSI_OPCODE_CMP) {
996 /* sel.{f32,f16} dst, src2, tmp, src1 */
997 instr = instr_create(ctx, 3,
998 ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
999 vectorize(ctx, instr, dst, 3,
1000 &inst->Src[2].Register, 0,
1001 tmp_src, 0,
1002 &inst->Src[1].Register, 0);
1003 } else {
1004 get_immediate(ctx, &constval0, fui(0.0));
1005 get_immediate(ctx, &constval1, fui(1.0));
1006 /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
1007 instr = instr_create(ctx, 3,
1008 ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
1009 vectorize(ctx, instr, dst, 3,
1010 &constval0, 0, tmp_src, 0, &constval1, 0);
1011 }
1012
1013 break;
1014 }
1015
1016 put_dst(ctx, inst, dst);
1017 }
1018
1019 /*
1020 * Conditional / Flow control
1021 */
1022
1023 static unsigned
1024 find_instruction(struct ir3_compile_context *ctx, struct ir3_instruction *instr)
1025 {
1026 unsigned i;
1027 for (i = 0; i < ctx->ir->instrs_count; i++)
1028 if (ctx->ir->instrs[i] == instr)
1029 return i;
1030 return ~0;
1031 }
1032
1033 static void
1034 push_branch(struct ir3_compile_context *ctx, struct ir3_instruction *instr)
1035 {
1036 ctx->branch[ctx->branch_count++] = instr;
1037 }
1038
1039 static void
1040 pop_branch(struct ir3_compile_context *ctx)
1041 {
1042 struct ir3_instruction *instr;
1043
1044 /* if we were clever enough, we'd patch this up after the fact,
1045 * and set (jp) flag on whatever the next instruction was, rather
1046 * than inserting an extra nop..
1047 */
1048 instr = instr_create(ctx, 0, OPC_NOP);
1049 instr->flags |= IR3_INSTR_JP;
1050
1051 /* pop the branch instruction from the stack and fix up branch target: */
1052 instr = ctx->branch[--ctx->branch_count];
1053 instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
1054 }
1055
1056 /* We probably don't really want to translate if/else/endif into branches..
1057 * the blob driver evaluates both legs of the if and then uses the sel
1058 * instruction to pick which sides of the branch to "keep".. but figuring
1059 * that out will take somewhat more compiler smarts. So hopefully branches
1060 * don't kill performance too badly.
1061 */
1062 static void
1063 trans_if(const struct instr_translater *t,
1064 struct ir3_compile_context *ctx,
1065 struct tgsi_full_instruction *inst)
1066 {
1067 struct ir3_instruction *instr;
1068 struct tgsi_src_register *src = &inst->Src[0].Register;
1069 struct tgsi_src_register constval;
1070
1071 get_immediate(ctx, &constval, fui(0.0));
1072
1073 if (is_const(src))
1074 src = get_unconst(ctx, src);
1075
1076 instr = instr_create(ctx, 2, OPC_CMPS_F);
1077 ir3_reg_create(instr, regid(REG_P0, 0), 0);
1078 add_src_reg(ctx, instr, src, src->SwizzleX);
1079 add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1080 instr->cat2.condition = IR3_COND_EQ;
1081
1082 instr = instr_create(ctx, 0, OPC_BR);
1083 push_branch(ctx, instr);
1084 }
1085
1086 static void
1087 trans_else(const struct instr_translater *t,
1088 struct ir3_compile_context *ctx,
1089 struct tgsi_full_instruction *inst)
1090 {
1091 struct ir3_instruction *instr;
1092
1093 /* for first half of if/else/endif, generate a jump past the else: */
1094 instr = instr_create(ctx, 0, OPC_JUMP);
1095
1096 pop_branch(ctx);
1097 push_branch(ctx, instr);
1098 }
1099
1100 static void
1101 trans_endif(const struct instr_translater *t,
1102 struct ir3_compile_context *ctx,
1103 struct tgsi_full_instruction *inst)
1104 {
1105 pop_branch(ctx);
1106 }
1107
1108 /*
1109 * Handlers for TGSI instructions which do have 1:1 mapping to native
1110 * instructions:
1111 */
1112
1113 static void
1114 instr_cat0(const struct instr_translater *t,
1115 struct ir3_compile_context *ctx,
1116 struct tgsi_full_instruction *inst)
1117 {
1118 instr_create(ctx, 0, t->opc);
1119 }
1120
1121 static void
1122 instr_cat1(const struct instr_translater *t,
1123 struct ir3_compile_context *ctx,
1124 struct tgsi_full_instruction *inst)
1125 {
1126 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1127 struct tgsi_src_register *src = &inst->Src[0].Register;
1128
1129 /* mov instructions can't handle a negate on src: */
1130 if (src->Negate) {
1131 struct tgsi_src_register constval;
1132 struct ir3_instruction *instr;
1133
1134 /* since right now, we are using uniformly either TYPE_F16 or
1135 * TYPE_F32, and we don't utilize the conversion possibilities
1136 * of mov instructions, we can get away with substituting an
1137 * add.f which can handle negate. Might need to revisit this
1138 * in the future if we start supporting widening/narrowing or
1139 * conversion to/from integer..
1140 */
1141 instr = instr_create(ctx, 2, OPC_ADD_F);
1142 get_immediate(ctx, &constval, fui(0.0));
1143 vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
1144 } else {
1145 create_mov(ctx, dst, src);
1146 /* create_mov() generates vector sequence, so no vectorize() */
1147 }
1148 put_dst(ctx, inst, dst);
1149 }
1150
1151 static void
1152 instr_cat2(const struct instr_translater *t,
1153 struct ir3_compile_context *ctx,
1154 struct tgsi_full_instruction *inst)
1155 {
1156 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1157 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1158 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1159 struct ir3_instruction *instr;
1160 unsigned src0_flags = 0, src1_flags = 0;
1161
1162 switch (t->tgsi_opc) {
1163 case TGSI_OPCODE_ABS:
1164 src0_flags = IR3_REG_ABS;
1165 break;
1166 case TGSI_OPCODE_SUB:
1167 src1_flags = IR3_REG_NEGATE;
1168 break;
1169 }
1170
1171 switch (t->opc) {
1172 case OPC_ABSNEG_F:
1173 case OPC_ABSNEG_S:
1174 case OPC_CLZ_B:
1175 case OPC_CLZ_S:
1176 case OPC_SIGN_F:
1177 case OPC_FLOOR_F:
1178 case OPC_CEIL_F:
1179 case OPC_RNDNE_F:
1180 case OPC_RNDAZ_F:
1181 case OPC_TRUNC_F:
1182 case OPC_NOT_B:
1183 case OPC_BFREV_B:
1184 case OPC_SETRM:
1185 case OPC_CBITS_B:
1186 /* these only have one src reg */
1187 instr = instr_create(ctx, 2, t->opc);
1188 vectorize(ctx, instr, dst, 1, src0, src0_flags);
1189 break;
1190 default:
1191 if (is_const(src0) && is_const(src1))
1192 src0 = get_unconst(ctx, src0);
1193
1194 instr = instr_create(ctx, 2, t->opc);
1195 vectorize(ctx, instr, dst, 2, src0, src0_flags,
1196 src1, src1_flags);
1197 break;
1198 }
1199
1200 put_dst(ctx, inst, dst);
1201 }
1202
1203 static void
1204 instr_cat3(const struct instr_translater *t,
1205 struct ir3_compile_context *ctx,
1206 struct tgsi_full_instruction *inst)
1207 {
1208 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1209 struct tgsi_src_register *src0 = &inst->Src[0].Register;
1210 struct tgsi_src_register *src1 = &inst->Src[1].Register;
1211 struct ir3_instruction *instr;
1212
1213 /* in particular, can't handle const for src1 for cat3..
1214 * for mad, we can swap first two src's if needed:
1215 */
1216 if (is_rel_or_const(src1)) {
1217 if (is_mad(t->opc) && !is_rel_or_const(src0)) {
1218 struct tgsi_src_register *tmp;
1219 tmp = src0;
1220 src0 = src1;
1221 src1 = tmp;
1222 } else {
1223 src1 = get_unconst(ctx, src1);
1224 }
1225 }
1226
1227 instr = instr_create(ctx, 3,
1228 ctx->so->key.half_precision ? t->hopc : t->opc);
1229 vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
1230 &inst->Src[2].Register, 0);
1231 put_dst(ctx, inst, dst);
1232 }
1233
1234 static void
1235 instr_cat4(const struct instr_translater *t,
1236 struct ir3_compile_context *ctx,
1237 struct tgsi_full_instruction *inst)
1238 {
1239 struct tgsi_dst_register *dst = get_dst(ctx, inst);
1240 struct tgsi_src_register *src = &inst->Src[0].Register;
1241 struct ir3_instruction *instr;
1242 unsigned i, n;
1243
1244 /* seems like blob compiler avoids const as src.. */
1245 if (is_const(src))
1246 src = get_unconst(ctx, src);
1247
1248 /* worst case: */
1249 add_nop(ctx, 6);
1250
1251 /* we need to replicate into each component: */
1252 for (i = 0, n = 0; i < 4; i++) {
1253 if (dst->WriteMask & (1 << i)) {
1254 if (n++)
1255 add_nop(ctx, 1);
1256 instr = instr_create(ctx, 4, t->opc);
1257 add_dst_reg(ctx, instr, dst, i);
1258 add_src_reg(ctx, instr, src, src->SwizzleX);
1259 }
1260 }
1261
1262 regmask_set(&ctx->needs_ss, instr->regs[0]);
1263 put_dst(ctx, inst, dst);
1264 }
1265
1266 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1267 #define INSTR(n, f, ...) \
1268 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1269
1270 INSTR(MOV, instr_cat1),
1271 INSTR(RCP, instr_cat4, .opc = OPC_RCP),
1272 INSTR(RSQ, instr_cat4, .opc = OPC_RSQ),
1273 INSTR(SQRT, instr_cat4, .opc = OPC_SQRT),
1274 INSTR(MUL, instr_cat2, .opc = OPC_MUL_F),
1275 INSTR(ADD, instr_cat2, .opc = OPC_ADD_F),
1276 INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
1277 INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
1278 INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
1279 INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1280 INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
1281 INSTR(CLAMP, trans_clamp),
1282 INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
1283 INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F),
1284 INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F),
1285 INSTR(ARL, trans_arl),
1286 INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
1287 INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
1288 INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F),
1289 INSTR(COS, instr_cat4, .opc = OPC_COS),
1290 INSTR(SIN, instr_cat4, .opc = OPC_SIN),
1291 INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1292 INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
1293 INSTR(SGT, trans_cmp),
1294 INSTR(SLT, trans_cmp),
1295 INSTR(SGE, trans_cmp),
1296 INSTR(SLE, trans_cmp),
1297 INSTR(SNE, trans_cmp),
1298 INSTR(SEQ, trans_cmp),
1299 INSTR(CMP, trans_cmp),
1300 INSTR(IF, trans_if),
1301 INSTR(ELSE, trans_else),
1302 INSTR(ENDIF, trans_endif),
1303 INSTR(END, instr_cat0, .opc = OPC_END),
1304 INSTR(KILL, instr_cat0, .opc = OPC_KILL),
1305 };
1306
1307 static ir3_semantic
1308 decl_semantic(const struct tgsi_declaration_semantic *sem)
1309 {
1310 return ir3_semantic_name(sem->Name, sem->Index);
1311 }
1312
1313 static int
1314 decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1315 {
1316 struct ir3_shader_variant *so = ctx->so;
1317 unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
1318 unsigned i, flags = 0;
1319 int nop = 0;
1320
1321 /* I don't think we should get frag shader input without
1322 * semantic info? Otherwise how do inputs get linked to
1323 * vert outputs?
1324 */
1325 compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
1326 decl->Declaration.Semantic);
1327
1328 if (ctx->so->key.half_precision)
1329 flags |= IR3_REG_HALF;
1330
1331 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1332 unsigned n = so->inputs_count++;
1333 unsigned r = regid(i + base, 0);
1334 unsigned ncomp;
1335
1336 /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1337 ncomp = 4;
1338
1339 DBG("decl in -> r%d", i + base); // XXX
1340
1341 compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
1342
1343 so->inputs[n].semantic = decl_semantic(&decl->Semantic);
1344 so->inputs[n].compmask = (1 << ncomp) - 1;
1345 so->inputs[n].ncomp = ncomp;
1346 so->inputs[n].regid = r;
1347 so->inputs[n].inloc = ctx->next_inloc;
1348 so->inputs[n].bary = true; /* all that is supported */
1349 ctx->next_inloc += ncomp;
1350
1351 so->total_in += ncomp;
1352
1353 /* for frag shaders, we need to generate the corresponding bary instr: */
1354 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1355 unsigned j;
1356
1357 for (j = 0; j < ncomp; j++) {
1358 struct ir3_instruction *instr;
1359 struct ir3_register *dst;
1360
1361 instr = instr_create(ctx, 2, OPC_BARY_F);
1362
1363 /* dst register: */
1364 dst = ir3_reg_create(instr, r + j, flags);
1365 ctx->last_input = dst;
1366
1367 /* input position: */
1368 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val =
1369 so->inputs[n].inloc + j - 8;
1370
1371 /* input base (always r0.xy): */
1372 ir3_reg_create(instr, regid(0,0), 0)->wrmask = 0x3;
1373 }
1374
1375 nop = 6;
1376 }
1377 }
1378
1379 return nop;
1380 }
1381
1382 static void
1383 decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1384 {
1385 struct ir3_shader_variant *so = ctx->so;
1386 unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1387 unsigned comp = 0;
1388 unsigned name = decl->Semantic.Name;
1389 unsigned i;
1390
1391 compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
1392
1393 DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
1394
1395 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
1396 switch (name) {
1397 case TGSI_SEMANTIC_POSITION:
1398 so->writes_pos = true;
1399 break;
1400 case TGSI_SEMANTIC_PSIZE:
1401 so->writes_psize = true;
1402 break;
1403 case TGSI_SEMANTIC_COLOR:
1404 case TGSI_SEMANTIC_BCOLOR:
1405 case TGSI_SEMANTIC_GENERIC:
1406 case TGSI_SEMANTIC_FOG:
1407 case TGSI_SEMANTIC_TEXCOORD:
1408 break;
1409 default:
1410 compile_error(ctx, "unknown VS semantic name: %s\n",
1411 tgsi_semantic_names[name]);
1412 }
1413 } else {
1414 switch (name) {
1415 case TGSI_SEMANTIC_POSITION:
1416 comp = 2; /* tgsi will write to .z component */
1417 so->writes_pos = true;
1418 break;
1419 case TGSI_SEMANTIC_COLOR:
1420 break;
1421 default:
1422 compile_error(ctx, "unknown FS semantic name: %s\n",
1423 tgsi_semantic_names[name]);
1424 }
1425 }
1426
1427 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1428 unsigned n = so->outputs_count++;
1429 compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
1430 so->outputs[n].semantic = decl_semantic(&decl->Semantic);
1431 so->outputs[n].regid = regid(i + base, comp);
1432 }
1433 }
1434
1435 static void
1436 decl_samp(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
1437 {
1438 ctx->so->has_samp = true;
1439 }
1440
1441 static void
1442 compile_instructions(struct ir3_compile_context *ctx)
1443 {
1444 struct ir3 *ir = ctx->ir;
1445 int nop = 0;
1446
1447 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1448 tgsi_parse_token(&ctx->parser);
1449
1450 switch (ctx->parser.FullToken.Token.Type) {
1451 case TGSI_TOKEN_TYPE_DECLARATION: {
1452 struct tgsi_full_declaration *decl =
1453 &ctx->parser.FullToken.FullDeclaration;
1454 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
1455 decl_out(ctx, decl);
1456 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
1457 nop = decl_in(ctx, decl);
1458 } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
1459 decl_samp(ctx, decl);
1460 }
1461 break;
1462 }
1463 case TGSI_TOKEN_TYPE_IMMEDIATE: {
1464 /* TODO: if we know the immediate is small enough, and only
1465 * used with instructions that can embed an immediate, we
1466 * can skip this:
1467 */
1468 struct tgsi_full_immediate *imm =
1469 &ctx->parser.FullToken.FullImmediate;
1470 unsigned n = ctx->so->immediates_count++;
1471 memcpy(ctx->so->immediates[n].val, imm->u, 16);
1472 break;
1473 }
1474 case TGSI_TOKEN_TYPE_INSTRUCTION: {
1475 struct tgsi_full_instruction *inst =
1476 &ctx->parser.FullToken.FullInstruction;
1477 unsigned opc = inst->Instruction.Opcode;
1478 const struct instr_translater *t = &translaters[opc];
1479
1480 add_nop(ctx, nop);
1481 nop = 0;
1482
1483 if (t->fxn) {
1484 t->fxn(t, ctx, inst);
1485 ctx->num_internal_temps = 0;
1486 } else {
1487 compile_error(ctx, "unknown TGSI opc: %s\n",
1488 tgsi_get_opcode_name(opc));
1489 }
1490
1491 switch (inst->Instruction.Saturate) {
1492 case TGSI_SAT_ZERO_ONE:
1493 create_clamp_imm(ctx, &inst->Dst[0].Register,
1494 fui(0.0), fui(1.0));
1495 break;
1496 case TGSI_SAT_MINUS_PLUS_ONE:
1497 create_clamp_imm(ctx, &inst->Dst[0].Register,
1498 fui(-1.0), fui(1.0));
1499 break;
1500 }
1501
1502 break;
1503 }
1504 default:
1505 break;
1506 }
1507 }
1508
1509 if (ir->instrs_count > 0)
1510 ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1511
1512 if (ctx->last_input)
1513 ctx->last_input->flags |= IR3_REG_EI;
1514
1515 handle_last_rel(ctx);
1516 }
1517
1518 int
1519 ir3_compile_shader_old(struct ir3_shader_variant *so,
1520 const struct tgsi_token *tokens, struct ir3_shader_key key)
1521 {
1522 struct ir3_compile_context ctx;
1523
1524 assert(!so->ir);
1525
1526 so->ir = ir3_create();
1527
1528 assert(so->ir);
1529
1530 if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
1531 return -1;
1532
1533 compile_instructions(&ctx);
1534
1535 compile_free(&ctx);
1536
1537 return 0;
1538 }