freedreno: a2xx: ir2 update
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_compiler.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "tgsi/tgsi_parse.h"
34 #include "tgsi/tgsi_ureg.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "tgsi/tgsi_dump.h"
38
39 #include "fd2_compiler.h"
40 #include "fd2_program.h"
41 #include "fd2_util.h"
42
43 #include "instr-a2xx.h"
44 #include "ir-a2xx.h"
45
46 struct fd2_compile_context {
47 struct fd_program_stateobj *prog;
48 struct fd2_shader_stateobj *so;
49
50 struct tgsi_parse_context parser;
51 unsigned type;
52
53 /* predicate stack: */
54 int pred_depth;
55 enum ir2_pred pred_stack[8];
56
57 /* Internal-Temporary and Predicate register assignment:
58 *
59 * Some TGSI instructions which translate into multiple actual
60 * instructions need one or more temporary registers, which are not
61 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
62 * And some instructions (texture fetch) cannot write directly to
63 * output registers. We could be more clever and re-use dst or a
64 * src register in some cases. But for now don't try to be clever.
65 * Eventually we should implement an optimization pass that re-
66 * juggles the register usage and gets rid of unneeded temporaries.
67 *
68 * The predicate register must be valid across multiple TGSI
69 * instructions, but internal temporary's do not. For this reason,
70 * once the predicate register is requested, until it is no longer
71 * needed, it gets the first register slot after after the TGSI
72 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
73 * internal temporaries get the register slots above this.
74 */
75
76 int pred_reg;
77 int num_internal_temps;
78
79 uint8_t num_regs[TGSI_FILE_COUNT];
80
81 /* maps input register idx to prog->export_linkage idx: */
82 uint8_t input_export_idx[64];
83
84 /* maps output register idx to prog->export_linkage idx: */
85 uint8_t output_export_idx[64];
86
87 /* idx/slot for last compiler generated immediate */
88 unsigned immediate_idx;
89
90 // TODO we can skip emit exports in the VS that the FS doesn't need..
91 // and get rid perhaps of num_param..
92 unsigned num_position, num_param;
93 unsigned position, psize;
94
95 uint64_t need_sync;
96 };
97
98 static int
99 semantic_idx(struct tgsi_declaration_semantic *semantic)
100 {
101 int idx = semantic->Name;
102 if (idx == TGSI_SEMANTIC_GENERIC)
103 idx = TGSI_SEMANTIC_COUNT + semantic->Index;
104 return idx;
105 }
106
107 /* assign/get the input/export register # for given semantic idx as
108 * returned by semantic_idx():
109 */
110 static int
111 export_linkage(struct fd2_compile_context *ctx, int idx)
112 {
113 struct fd_program_stateobj *prog = ctx->prog;
114
115 /* if first time we've seen this export, assign the next available slot: */
116 if (prog->export_linkage[idx] == 0xff)
117 prog->export_linkage[idx] = prog->num_exports++;
118
119 return prog->export_linkage[idx];
120 }
121
122 static unsigned
123 compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
124 struct fd2_shader_stateobj *so)
125 {
126 unsigned ret;
127
128 ctx->prog = prog;
129 ctx->so = so;
130 ctx->pred_depth = 0;
131
132 ret = tgsi_parse_init(&ctx->parser, so->tokens);
133 if (ret != TGSI_PARSE_OK)
134 return ret;
135
136 ctx->type = ctx->parser.FullHeader.Processor.Processor;
137 ctx->position = ~0;
138 ctx->psize = ~0;
139 ctx->num_position = 0;
140 ctx->num_param = 0;
141 ctx->need_sync = 0;
142 ctx->immediate_idx = 0;
143 ctx->pred_reg = -1;
144 ctx->num_internal_temps = 0;
145
146 memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
147 memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
148 memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
149
150 /* do first pass to extract declarations: */
151 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
152 tgsi_parse_token(&ctx->parser);
153
154 switch (ctx->parser.FullToken.Token.Type) {
155 case TGSI_TOKEN_TYPE_DECLARATION: {
156 struct tgsi_full_declaration *decl =
157 &ctx->parser.FullToken.FullDeclaration;
158 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
159 unsigned name = decl->Semantic.Name;
160
161 assert(decl->Declaration.Semantic); // TODO is this ever not true?
162
163 ctx->output_export_idx[decl->Range.First] =
164 semantic_idx(&decl->Semantic);
165
166 if (ctx->type == PIPE_SHADER_VERTEX) {
167 switch (name) {
168 case TGSI_SEMANTIC_POSITION:
169 ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
170 ctx->num_position++;
171 break;
172 case TGSI_SEMANTIC_PSIZE:
173 ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
174 ctx->num_position++;
175 break;
176 case TGSI_SEMANTIC_COLOR:
177 case TGSI_SEMANTIC_GENERIC:
178 ctx->num_param++;
179 break;
180 default:
181 DBG("unknown VS semantic name: %s",
182 tgsi_semantic_names[name]);
183 assert(0);
184 }
185 } else {
186 switch (name) {
187 case TGSI_SEMANTIC_COLOR:
188 case TGSI_SEMANTIC_GENERIC:
189 ctx->num_param++;
190 break;
191 default:
192 DBG("unknown PS semantic name: %s",
193 tgsi_semantic_names[name]);
194 assert(0);
195 }
196 }
197 } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
198 ctx->input_export_idx[decl->Range.First] =
199 semantic_idx(&decl->Semantic);
200 }
201 ctx->num_regs[decl->Declaration.File] =
202 MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
203 break;
204 }
205 case TGSI_TOKEN_TYPE_IMMEDIATE: {
206 struct tgsi_full_immediate *imm =
207 &ctx->parser.FullToken.FullImmediate;
208 unsigned n = ctx->so->num_immediates++;
209 memcpy(ctx->so->immediates[n].val, imm->u, 16);
210 break;
211 }
212 default:
213 break;
214 }
215 }
216
217 /* TGSI generated immediates are always entire vec4's, ones we
218 * generate internally are not:
219 */
220 ctx->immediate_idx = ctx->so->num_immediates * 4;
221
222 ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
223
224 tgsi_parse_free(&ctx->parser);
225
226 return tgsi_parse_init(&ctx->parser, so->tokens);
227 }
228
229 static void
230 compile_free(struct fd2_compile_context *ctx)
231 {
232 tgsi_parse_free(&ctx->parser);
233 }
234
235 static void
236 compile_vtx_fetch(struct fd2_compile_context *ctx)
237 {
238 struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
239 int i;
240 for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
241 struct ir2_instruction *instr = ir2_instr_create(
242 ctx->so->ir, IR2_FETCH);
243 instr->fetch.opc = VTX_FETCH;
244
245 ctx->need_sync |= 1 << (i+1);
246
247 ir2_dst_create(instr, i+1, "xyzw", 0);
248 ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
249
250 if (i == 0)
251 instr->sync = true;
252
253 vfetch_instrs[i] = instr;
254 }
255 ctx->so->num_vfetch_instrs = i;
256 }
257
258 /*
259 * For vertex shaders (VS):
260 * --- ------ -------------
261 *
262 * Inputs: R1-R(num_input)
263 * Constants: C0-C(num_const-1)
264 * Immediates: C(num_const)-C(num_const+num_imm-1)
265 * Outputs: export0-export(n) and export62, export63
266 * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
267 * Temps: R(num_input+1)-R(num_input+num_temps)
268 *
269 * R0 could be clobbered after the vertex fetch instructions.. so we
270 * could use it for one of the temporaries.
271 *
272 * TODO: maybe the vertex fetch part could fetch first input into R0 as
273 * the last vtx fetch instruction, which would let us use the same
274 * register layout in either case.. although this is not what the blob
275 * compiler does.
276 *
277 *
278 * For frag shaders (PS):
279 * --- ---- -------------
280 *
281 * Inputs: R0-R(num_input-1)
282 * Constants: same as VS
283 * Immediates: same as VS
284 * Outputs: export0-export(num_outputs)
285 * Temps: R(num_input)-R(num_input+num_temps-1)
286 *
287 * In either case, immediates are are postpended to the constants
288 * (uniforms).
289 *
290 */
291
292 static unsigned
293 get_temp_gpr(struct fd2_compile_context *ctx, int idx)
294 {
295 unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
296 if (ctx->type == PIPE_SHADER_VERTEX)
297 num++;
298 return num;
299 }
300
301 static struct ir2_dst_register *
302 add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
303 const struct tgsi_dst_register *dst)
304 {
305 unsigned flags = 0, num = 0;
306 char swiz[5];
307
308 switch (dst->File) {
309 case TGSI_FILE_OUTPUT:
310 flags |= IR2_REG_EXPORT;
311 if (ctx->type == PIPE_SHADER_VERTEX) {
312 if (dst->Index == ctx->position) {
313 num = 62;
314 } else if (dst->Index == ctx->psize) {
315 num = 63;
316 } else {
317 num = export_linkage(ctx,
318 ctx->output_export_idx[dst->Index]);
319 }
320 } else {
321 num = dst->Index;
322 }
323 break;
324 case TGSI_FILE_TEMPORARY:
325 num = get_temp_gpr(ctx, dst->Index);
326 break;
327 default:
328 DBG("unsupported dst register file: %s",
329 tgsi_file_name(dst->File));
330 assert(0);
331 break;
332 }
333
334 swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
335 swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
336 swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
337 swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
338 swiz[4] = '\0';
339
340 return ir2_dst_create(alu, num, swiz, flags);
341 }
342
343 static struct ir2_src_register *
344 add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
345 const struct tgsi_src_register *src)
346 {
347 static const char swiz_vals[] = {
348 'x', 'y', 'z', 'w',
349 };
350 char swiz[5];
351 unsigned flags = 0, num = 0;
352
353 switch (src->File) {
354 case TGSI_FILE_CONSTANT:
355 num = src->Index;
356 flags |= IR2_REG_CONST;
357 break;
358 case TGSI_FILE_INPUT:
359 if (ctx->type == PIPE_SHADER_VERTEX) {
360 num = src->Index + 1;
361 } else {
362 flags |= IR2_REG_INPUT;
363 num = export_linkage(ctx,
364 ctx->input_export_idx[src->Index]);
365 }
366 break;
367 case TGSI_FILE_TEMPORARY:
368 num = get_temp_gpr(ctx, src->Index);
369 break;
370 case TGSI_FILE_IMMEDIATE:
371 num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
372 flags |= IR2_REG_CONST;
373 break;
374 default:
375 DBG("unsupported src register file: %s",
376 tgsi_file_name(src->File));
377 assert(0);
378 break;
379 }
380
381 if (src->Absolute)
382 flags |= IR2_REG_ABS;
383 if (src->Negate)
384 flags |= IR2_REG_NEGATE;
385
386 swiz[0] = swiz_vals[src->SwizzleX];
387 swiz[1] = swiz_vals[src->SwizzleY];
388 swiz[2] = swiz_vals[src->SwizzleZ];
389 swiz[3] = swiz_vals[src->SwizzleW];
390 swiz[4] = '\0';
391
392 if ((ctx->need_sync & ((uint64_t)1 << num)) &&
393 !(flags & IR2_REG_CONST)) {
394 alu->sync = true;
395 ctx->need_sync &= ~((uint64_t)1 << num);
396 }
397
398 return ir2_reg_create(alu, num, swiz, flags);
399 }
400
401 static void
402 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
403 {
404 if (inst->Instruction.Saturate) {
405 alu->alu_vector.clamp = true;
406 }
407 }
408
409 static void
410 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
411 {
412 if (inst->Instruction.Saturate) {
413 alu->alu_scalar.clamp = true;
414 }
415 }
416
417 static void
418 add_regs_vector_1(struct fd2_compile_context *ctx,
419 struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
420 {
421 assert(inst->Instruction.NumSrcRegs == 1);
422 assert(inst->Instruction.NumDstRegs == 1);
423
424 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
425 add_src_reg(ctx, alu, &inst->Src[0].Register);
426 add_src_reg(ctx, alu, &inst->Src[0].Register);
427 add_vector_clamp(inst, alu);
428 }
429
430 static void
431 add_regs_vector_2(struct fd2_compile_context *ctx,
432 struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
433 {
434 assert(inst->Instruction.NumSrcRegs == 2);
435 assert(inst->Instruction.NumDstRegs == 1);
436
437 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
438 add_src_reg(ctx, alu, &inst->Src[0].Register);
439 add_src_reg(ctx, alu, &inst->Src[1].Register);
440 add_vector_clamp(inst, alu);
441 }
442
443 static void
444 add_regs_vector_3(struct fd2_compile_context *ctx,
445 struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
446 {
447 assert(inst->Instruction.NumSrcRegs == 3);
448 assert(inst->Instruction.NumDstRegs == 1);
449
450 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
451 add_src_reg(ctx, alu, &inst->Src[0].Register);
452 add_src_reg(ctx, alu, &inst->Src[1].Register);
453 add_src_reg(ctx, alu, &inst->Src[2].Register);
454 add_vector_clamp(inst, alu);
455 }
456
457 static void
458 add_regs_scalar_1(struct fd2_compile_context *ctx,
459 struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
460 {
461 assert(inst->Instruction.NumSrcRegs == 1);
462 assert(inst->Instruction.NumDstRegs == 1);
463
464 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
465 add_src_reg(ctx, alu, &inst->Src[0].Register);
466 add_scalar_clamp(inst, alu);
467 }
468
469 /*
470 * Helpers for TGSI instructions that don't map to a single shader instr:
471 */
472
473 static void
474 src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
475 {
476 src->File = dst->File;
477 src->Indirect = dst->Indirect;
478 src->Dimension = dst->Dimension;
479 src->Index = dst->Index;
480 src->Absolute = 0;
481 src->Negate = 0;
482 src->SwizzleX = TGSI_SWIZZLE_X;
483 src->SwizzleY = TGSI_SWIZZLE_Y;
484 src->SwizzleZ = TGSI_SWIZZLE_Z;
485 src->SwizzleW = TGSI_SWIZZLE_W;
486 }
487
488 /* Get internal-temp src/dst to use for a sequence of instructions
489 * generated by a single TGSI op.
490 */
491 static void
492 get_internal_temp(struct fd2_compile_context *ctx,
493 struct tgsi_dst_register *tmp_dst,
494 struct tgsi_src_register *tmp_src)
495 {
496 int n;
497
498 tmp_dst->File = TGSI_FILE_TEMPORARY;
499 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
500 tmp_dst->Indirect = 0;
501 tmp_dst->Dimension = 0;
502
503 /* assign next temporary: */
504 n = ctx->num_internal_temps++;
505 if (ctx->pred_reg != -1)
506 n++;
507
508 tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
509
510 src_from_dst(tmp_src, tmp_dst);
511 }
512
513 static void
514 get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
515 struct tgsi_src_register *src)
516 {
517 assert(ctx->pred_reg != -1);
518
519 dst->File = TGSI_FILE_TEMPORARY;
520 dst->WriteMask = TGSI_WRITEMASK_W;
521 dst->Indirect = 0;
522 dst->Dimension = 0;
523 dst->Index = get_temp_gpr(ctx, ctx->pred_reg);
524
525 if (src) {
526 src_from_dst(src, dst);
527 src->SwizzleX = TGSI_SWIZZLE_W;
528 src->SwizzleY = TGSI_SWIZZLE_W;
529 src->SwizzleZ = TGSI_SWIZZLE_W;
530 src->SwizzleW = TGSI_SWIZZLE_W;
531 }
532 }
533
534 static void
535 push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
536 {
537 struct ir2_instruction *alu;
538 struct tgsi_dst_register pred_dst;
539
540 if (ctx->pred_depth == 0) {
541 /* assign predicate register: */
542 ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
543
544 get_predicate(ctx, &pred_dst, NULL);
545
546 alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs);
547 add_dst_reg(ctx, alu, &pred_dst);
548 add_src_reg(ctx, alu, src);
549 } else {
550 struct tgsi_src_register pred_src;
551
552 get_predicate(ctx, &pred_dst, &pred_src);
553
554 alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
555 add_dst_reg(ctx, alu, &pred_dst);
556 add_src_reg(ctx, alu, &pred_src);
557 add_src_reg(ctx, alu, src);
558
559 // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
560 // sure src reg is valid if it was calculated with a predicate
561 // condition..
562 alu->pred = IR2_PRED_NONE;
563 }
564
565 /* save previous pred state to restore in pop_predicate(): */
566 ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
567 }
568
569 static void
570 pop_predicate(struct fd2_compile_context *ctx)
571 {
572 /* restore previous predicate state: */
573 ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
574
575 if (ctx->pred_depth != 0) {
576 struct ir2_instruction *alu;
577 struct tgsi_dst_register pred_dst;
578 struct tgsi_src_register pred_src;
579
580 get_predicate(ctx, &pred_dst, &pred_src);
581
582 alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs);
583 add_dst_reg(ctx, alu, &pred_dst);
584 add_src_reg(ctx, alu, &pred_src);
585 alu->pred = IR2_PRED_NONE;
586 } else {
587 /* predicate register no longer needed: */
588 ctx->pred_reg = -1;
589 }
590 }
591
592 static void
593 get_immediate(struct fd2_compile_context *ctx,
594 struct tgsi_src_register *reg, uint32_t val)
595 {
596 unsigned neg, swiz, idx, i;
597 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
598 static const unsigned swiz2tgsi[] = {
599 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
600 };
601
602 for (i = 0; i < ctx->immediate_idx; i++) {
603 swiz = i % 4;
604 idx = i / 4;
605
606 if (ctx->so->immediates[idx].val[swiz] == val) {
607 neg = 0;
608 break;
609 }
610
611 if (ctx->so->immediates[idx].val[swiz] == -val) {
612 neg = 1;
613 break;
614 }
615 }
616
617 if (i == ctx->immediate_idx) {
618 /* need to generate a new immediate: */
619 swiz = i % 4;
620 idx = i / 4;
621 neg = 0;
622 ctx->so->immediates[idx].val[swiz] = val;
623 ctx->so->num_immediates = idx + 1;
624 ctx->immediate_idx++;
625 }
626
627 reg->File = TGSI_FILE_IMMEDIATE;
628 reg->Indirect = 0;
629 reg->Dimension = 0;
630 reg->Index = idx;
631 reg->Absolute = 0;
632 reg->Negate = neg;
633 reg->SwizzleX = swiz2tgsi[swiz];
634 reg->SwizzleY = swiz2tgsi[swiz];
635 reg->SwizzleZ = swiz2tgsi[swiz];
636 reg->SwizzleW = swiz2tgsi[swiz];
637 }
638
639 /* POW(a,b) = EXP2(b * LOG2(a)) */
640 static void
641 translate_pow(struct fd2_compile_context *ctx,
642 struct tgsi_full_instruction *inst)
643 {
644 struct tgsi_dst_register tmp_dst;
645 struct tgsi_src_register tmp_src;
646 struct ir2_instruction *alu;
647
648 get_internal_temp(ctx, &tmp_dst, &tmp_src);
649
650 alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP);
651 add_dst_reg(ctx, alu, &tmp_dst);
652 add_src_reg(ctx, alu, &inst->Src[0].Register);
653
654 alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
655 add_dst_reg(ctx, alu, &tmp_dst);
656 add_src_reg(ctx, alu, &tmp_src);
657 add_src_reg(ctx, alu, &inst->Src[1].Register);
658
659 /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
660 * coded to take their input from the w component.
661 */
662 switch(inst->Dst[0].Register.WriteMask) {
663 case TGSI_WRITEMASK_X:
664 tmp_src.SwizzleW = TGSI_SWIZZLE_X;
665 break;
666 case TGSI_WRITEMASK_Y:
667 tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
668 break;
669 case TGSI_WRITEMASK_Z:
670 tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
671 break;
672 case TGSI_WRITEMASK_W:
673 tmp_src.SwizzleW = TGSI_SWIZZLE_W;
674 break;
675 default:
676 DBG("invalid writemask!");
677 assert(0);
678 break;
679 }
680
681 alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
682 add_dst_reg(ctx, alu, &inst->Dst[0].Register);
683 add_src_reg(ctx, alu, &tmp_src);
684 add_scalar_clamp(inst, alu);
685 }
686
687 static void
688 translate_tex(struct fd2_compile_context *ctx,
689 struct tgsi_full_instruction *inst, unsigned opc)
690 {
691 struct ir2_instruction *instr;
692 struct ir2_src_register *reg;
693 struct tgsi_dst_register tmp_dst;
694 struct tgsi_src_register tmp_src;
695 const struct tgsi_src_register *coord;
696 bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
697 inst->Instruction.Saturate;
698 int idx;
699
700 if (using_temp || (opc == TGSI_OPCODE_TXP))
701 get_internal_temp(ctx, &tmp_dst, &tmp_src);
702
703 if (opc == TGSI_OPCODE_TXP) {
704 static const char *swiz[] = {
705 [TGSI_SWIZZLE_X] = "xxxx",
706 [TGSI_SWIZZLE_Y] = "yyyy",
707 [TGSI_SWIZZLE_Z] = "zzzz",
708 [TGSI_SWIZZLE_W] = "wwww",
709 };
710
711 /* TXP - Projective Texture Lookup:
712 *
713 * coord.x = src0.x / src.w
714 * coord.y = src0.y / src.w
715 * coord.z = src0.z / src.w
716 * coord.w = src0.w
717 * bias = 0.0
718 *
719 * dst = texture_sample(unit, coord, bias)
720 */
721
722 instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
723 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
724 add_src_reg(ctx, instr, &inst->Src[0].Register);
725 add_src_reg(ctx, instr, &inst->Src[0].Register);
726
727 instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
728 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
729 add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle =
730 swiz[inst->Src[0].Register.SwizzleW];
731
732 instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
733 add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
734 add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
735 add_src_reg(ctx, instr, &inst->Src[0].Register);
736
737 coord = &tmp_src;
738 } else {
739 coord = &inst->Src[0].Register;
740 }
741
742 instr = ir2_instr_create(ctx->so->ir, IR2_FETCH);
743 instr->fetch.opc = TEX_FETCH;
744 instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
745 instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT);
746 assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
747
748 /* save off the tex fetch to be patched later with correct const_idx: */
749 idx = ctx->so->num_tfetch_instrs++;
750 ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
751 ctx->so->tfetch_instrs[idx].instr = instr;
752
753 add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
754 reg = add_src_reg(ctx, instr, coord);
755
756 /* blob compiler always sets 3rd component to same as 1st for 2d: */
757 if (inst->Texture.Texture == TGSI_TEXTURE_2D || inst->Texture.Texture == TGSI_TEXTURE_RECT)
758 reg->swizzle[2] = reg->swizzle[0];
759
760 /* dst register needs to be marked for sync: */
761 ctx->need_sync |= 1 << instr->dst_reg.num;
762
763 /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
764 instr->sync = true;
765
766 if (using_temp) {
767 /* texture fetch can't write directly to export, so if tgsi
768 * is telling us the dst register is in output file, we load
769 * the texture to a temp and the use ALU instruction to move
770 * to output
771 */
772 instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
773
774 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
775 add_src_reg(ctx, instr, &tmp_src);
776 add_src_reg(ctx, instr, &tmp_src);
777 add_vector_clamp(inst, instr);
778 }
779 }
780
781 /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
782 /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
783 /* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */
784 /* SNE(a,b) = EQU((b - a), 0.0, 1.0) */
785 static void
786 translate_sge_slt_seq_sne(struct fd2_compile_context *ctx,
787 struct tgsi_full_instruction *inst, unsigned opc)
788 {
789 struct ir2_instruction *instr;
790 struct tgsi_dst_register tmp_dst;
791 struct tgsi_src_register tmp_src;
792 struct tgsi_src_register tmp_const;
793 float c0, c1;
794 instr_vector_opc_t vopc;
795
796 switch (opc) {
797 default:
798 assert(0);
799 case TGSI_OPCODE_SGE:
800 c0 = 1.0;
801 c1 = 0.0;
802 vopc = CNDGTEv;
803 break;
804 case TGSI_OPCODE_SLT:
805 c0 = 0.0;
806 c1 = 1.0;
807 vopc = CNDGTEv;
808 break;
809 case TGSI_OPCODE_SEQ:
810 c0 = 0.0;
811 c1 = 1.0;
812 vopc = CNDEv;
813 break;
814 case TGSI_OPCODE_SNE:
815 c0 = 1.0;
816 c1 = 0.0;
817 vopc = CNDEv;
818 break;
819 }
820
821 get_internal_temp(ctx, &tmp_dst, &tmp_src);
822
823 instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
824 add_dst_reg(ctx, instr, &tmp_dst);
825 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
826 add_src_reg(ctx, instr, &inst->Src[1].Register);
827
828 instr = ir2_instr_create_alu_v(ctx->so->ir, vopc);
829 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
830 add_src_reg(ctx, instr, &tmp_src);
831 get_immediate(ctx, &tmp_const, fui(c1));
832 add_src_reg(ctx, instr, &tmp_const);
833 get_immediate(ctx, &tmp_const, fui(c0));
834 add_src_reg(ctx, instr, &tmp_const);
835 }
836
837 /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
838 static void
839 translate_lrp(struct fd2_compile_context *ctx,
840 struct tgsi_full_instruction *inst,
841 unsigned opc)
842 {
843 struct ir2_instruction *instr;
844 struct tgsi_dst_register tmp_dst1, tmp_dst2;
845 struct tgsi_src_register tmp_src1, tmp_src2;
846 struct tgsi_src_register tmp_const;
847
848 get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
849 get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
850
851 get_immediate(ctx, &tmp_const, fui(1.0));
852
853 /* tmp1 = (a * b) */
854 instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
855 add_dst_reg(ctx, instr, &tmp_dst1);
856 add_src_reg(ctx, instr, &inst->Src[0].Register);
857 add_src_reg(ctx, instr, &inst->Src[1].Register);
858
859 /* tmp2 = (1 - a) */
860 instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
861 add_dst_reg(ctx, instr, &tmp_dst2);
862 add_src_reg(ctx, instr, &tmp_const);
863 add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
864
865 /* tmp2 = tmp2 * c */
866 instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
867 add_dst_reg(ctx, instr, &tmp_dst2);
868 add_src_reg(ctx, instr, &tmp_src2);
869 add_src_reg(ctx, instr, &inst->Src[2].Register);
870
871 /* dst = tmp1 + tmp2 */
872 instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
873 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
874 add_src_reg(ctx, instr, &tmp_src1);
875 add_src_reg(ctx, instr, &tmp_src2);
876 }
877
878 static void
879 translate_trig(struct fd2_compile_context *ctx,
880 struct tgsi_full_instruction *inst,
881 unsigned opc)
882 {
883 struct ir2_instruction *instr;
884 struct tgsi_dst_register tmp_dst;
885 struct tgsi_src_register tmp_src;
886 struct tgsi_src_register tmp_const;
887 instr_scalar_opc_t op;
888
889 switch (opc) {
890 default:
891 assert(0);
892 case TGSI_OPCODE_SIN:
893 op = SIN;
894 break;
895 case TGSI_OPCODE_COS:
896 op = COS;
897 break;
898 }
899
900 get_internal_temp(ctx, &tmp_dst, &tmp_src);
901
902 tmp_dst.WriteMask = TGSI_WRITEMASK_X;
903 tmp_src.SwizzleX = tmp_src.SwizzleY =
904 tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
905
906 instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
907 add_dst_reg(ctx, instr, &tmp_dst);
908 add_src_reg(ctx, instr, &inst->Src[0].Register);
909 get_immediate(ctx, &tmp_const, fui(0.159155));
910 add_src_reg(ctx, instr, &tmp_const);
911 get_immediate(ctx, &tmp_const, fui(0.5));
912 add_src_reg(ctx, instr, &tmp_const);
913
914 instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
915 add_dst_reg(ctx, instr, &tmp_dst);
916 add_src_reg(ctx, instr, &tmp_src);
917 add_src_reg(ctx, instr, &tmp_src);
918
919 instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
920 add_dst_reg(ctx, instr, &tmp_dst);
921 add_src_reg(ctx, instr, &tmp_src);
922 get_immediate(ctx, &tmp_const, fui(6.283185));
923 add_src_reg(ctx, instr, &tmp_const);
924 get_immediate(ctx, &tmp_const, fui(-3.141593));
925 add_src_reg(ctx, instr, &tmp_const);
926
927 instr = ir2_instr_create_alu_s(ctx->so->ir, op);
928 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
929 add_src_reg(ctx, instr, &tmp_src);
930 }
931
932 static void
933 translate_dp2(struct fd2_compile_context *ctx,
934 struct tgsi_full_instruction *inst,
935 unsigned opc)
936 {
937 struct tgsi_src_register tmp_const;
938 struct ir2_instruction *instr;
939 /* DP2ADD c,a,b -> dot2(a,b) + c */
940 /* for c we use the constant 0.0 */
941 instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv);
942 add_dst_reg(ctx, instr, &inst->Dst[0].Register);
943 add_src_reg(ctx, instr, &inst->Src[0].Register);
944 add_src_reg(ctx, instr, &inst->Src[1].Register);
945 get_immediate(ctx, &tmp_const, fui(0.0f));
946 add_src_reg(ctx, instr, &tmp_const);
947 add_vector_clamp(inst, instr);
948 }
949
950 /*
951 * Main part of compiler/translator:
952 */
953
954 static void
955 translate_instruction(struct fd2_compile_context *ctx,
956 struct tgsi_full_instruction *inst)
957 {
958 unsigned opc = inst->Instruction.Opcode;
959 struct ir2_instruction *instr;
960
961 if (opc == TGSI_OPCODE_END)
962 return;
963
964 /* TODO turn this into a table: */
965 switch (opc) {
966 case TGSI_OPCODE_MOV:
967 instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
968 add_regs_vector_1(ctx, inst, instr);
969 break;
970 case TGSI_OPCODE_RCP:
971 instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
972 add_regs_scalar_1(ctx, inst, instr);
973 break;
974 case TGSI_OPCODE_RSQ:
975 instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE);
976 add_regs_scalar_1(ctx, inst, instr);
977 break;
978 case TGSI_OPCODE_SQRT:
979 instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE);
980 add_regs_scalar_1(ctx, inst, instr);
981 break;
982 case TGSI_OPCODE_MUL:
983 instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
984 add_regs_vector_2(ctx, inst, instr);
985 break;
986 case TGSI_OPCODE_ADD:
987 instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
988 add_regs_vector_2(ctx, inst, instr);
989 break;
990 case TGSI_OPCODE_DP2:
991 translate_dp2(ctx, inst, opc);
992 break;
993 case TGSI_OPCODE_DP3:
994 instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v);
995 add_regs_vector_2(ctx, inst, instr);
996 break;
997 case TGSI_OPCODE_DP4:
998 instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v);
999 add_regs_vector_2(ctx, inst, instr);
1000 break;
1001 case TGSI_OPCODE_MIN:
1002 instr = ir2_instr_create_alu_v(ctx->so->ir, MINv);
1003 add_regs_vector_2(ctx, inst, instr);
1004 break;
1005 case TGSI_OPCODE_MAX:
1006 instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
1007 add_regs_vector_2(ctx, inst, instr);
1008 break;
1009 case TGSI_OPCODE_SLT:
1010 case TGSI_OPCODE_SGE:
1011 case TGSI_OPCODE_SEQ:
1012 case TGSI_OPCODE_SNE:
1013 translate_sge_slt_seq_sne(ctx, inst, opc);
1014 break;
1015 case TGSI_OPCODE_MAD:
1016 instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
1017 add_regs_vector_3(ctx, inst, instr);
1018 break;
1019 case TGSI_OPCODE_LRP:
1020 translate_lrp(ctx, inst, opc);
1021 break;
1022 case TGSI_OPCODE_FRC:
1023 instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
1024 add_regs_vector_1(ctx, inst, instr);
1025 break;
1026 case TGSI_OPCODE_FLR:
1027 instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv);
1028 add_regs_vector_1(ctx, inst, instr);
1029 break;
1030 case TGSI_OPCODE_EX2:
1031 instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
1032 add_regs_scalar_1(ctx, inst, instr);
1033 break;
1034 case TGSI_OPCODE_POW:
1035 translate_pow(ctx, inst);
1036 break;
1037 case TGSI_OPCODE_COS:
1038 case TGSI_OPCODE_SIN:
1039 translate_trig(ctx, inst, opc);
1040 break;
1041 case TGSI_OPCODE_TEX:
1042 case TGSI_OPCODE_TXP:
1043 translate_tex(ctx, inst, opc);
1044 break;
1045 case TGSI_OPCODE_CMP:
1046 instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv);
1047 add_regs_vector_3(ctx, inst, instr);
1048 instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */
1049 break;
1050 case TGSI_OPCODE_IF:
1051 push_predicate(ctx, &inst->Src[0].Register);
1052 ctx->so->ir->pred = IR2_PRED_EQ;
1053 break;
1054 case TGSI_OPCODE_ELSE:
1055 ctx->so->ir->pred = IR2_PRED_NE;
1056 break;
1057 case TGSI_OPCODE_ENDIF:
1058 pop_predicate(ctx);
1059 break;
1060 case TGSI_OPCODE_F2I:
1061 instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv);
1062 add_regs_vector_1(ctx, inst, instr);
1063 break;
1064 default:
1065 DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
1066 tgsi_dump(ctx->so->tokens, 0);
1067 assert(0);
1068 break;
1069 }
1070
1071 /* internal temporaries are only valid for the duration of a single
1072 * TGSI instruction:
1073 */
1074 ctx->num_internal_temps = 0;
1075 }
1076
1077 static void
1078 compile_instructions(struct fd2_compile_context *ctx)
1079 {
1080 while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1081 tgsi_parse_token(&ctx->parser);
1082
1083 switch (ctx->parser.FullToken.Token.Type) {
1084 case TGSI_TOKEN_TYPE_INSTRUCTION:
1085 translate_instruction(ctx,
1086 &ctx->parser.FullToken.FullInstruction);
1087 break;
1088 default:
1089 break;
1090 }
1091 }
1092 }
1093
1094 int
1095 fd2_compile_shader(struct fd_program_stateobj *prog,
1096 struct fd2_shader_stateobj *so)
1097 {
1098 struct fd2_compile_context ctx;
1099
1100 ir2_shader_destroy(so->ir);
1101 so->ir = ir2_shader_create();
1102 so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
1103
1104 if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
1105 return -1;
1106
1107 if (ctx.type == PIPE_SHADER_VERTEX) {
1108 compile_vtx_fetch(&ctx);
1109 } else if (ctx.type == PIPE_SHADER_FRAGMENT) {
1110 prog->num_exports = 0;
1111 memset(prog->export_linkage, 0xff,
1112 sizeof(prog->export_linkage));
1113 }
1114
1115 compile_instructions(&ctx);
1116
1117 compile_free(&ctx);
1118
1119 return 0;
1120 }
1121