etnaviv: remove not needed forward declarations
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler.c
1 /*
2 * Copyright (c) 2012-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 /* TGSI->Vivante shader ISA conversion */
28
29 /* What does the compiler return (see etna_shader_object)?
30 * 1) instruction data
31 * 2) input-to-temporary mapping (fixed for ps)
32 * *) in case of ps, semantic -> varying id mapping
33 * *) for each varying: number of components used (r, rg, rgb, rgba)
34 * 3) temporary-to-output mapping (in case of vs, fixed for ps)
35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 * 5) immediates base offset, immediates data
37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 * configure the hw, but useful for error checking
39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 * (output reg id is enough)
41 *
42 * Empty shaders are not allowed, should always at least generate a NOP. Also
43 * if there is a label at the end of the shader, an extra NOP should be
44 * generated as jump target.
45 *
46 * TODO
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
49 */
50
51 #include "etnaviv_compiler.h"
52
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
59
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73
74 #define ETNA_MAX_INNER_TEMPS 2
75
76 static const float sincos_const[2][4] = {
77 {
78 2., -1., 4., -4.,
79 },
80 {
81 1. / (2. * M_PI), 0.75, 0.5, 0.0,
82 },
83 };
84
85 /* Native register description structure */
86 struct etna_native_reg {
87 unsigned valid : 1;
88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89 unsigned rgroup : 3;
90 unsigned id : 9;
91 };
92
93 /* Register description */
94 struct etna_reg_desc {
95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96 int idx; /* index into file */
97 bool active; /* used in program */
98 int first_use; /* instruction id of first use (scope begin) */
99 int last_use; /* instruction id of last use (scope end, inclusive) */
100
101 struct etna_native_reg native; /* native register to map to */
102 unsigned usage_mask : 4; /* usage, per channel */
103 bool has_semantic; /* register has associated TGSI semantic */
104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105 struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107
108 /* Label information structure */
109 struct etna_compile_label {
110 int inst_idx; /* Instruction id that label points to */
111 };
112
113 enum etna_compile_frame_type {
114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115 ETNA_COMPILE_FRAME_LOOP,
116 };
117
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119 */
120 struct etna_compile_frame {
121 enum etna_compile_frame_type type;
122 struct etna_compile_label *lbl_else;
123 struct etna_compile_label *lbl_endif;
124 struct etna_compile_label *lbl_loop_bgn;
125 struct etna_compile_label *lbl_loop_end;
126 };
127
128 struct etna_compile_file {
129 /* Number of registers in each TGSI file (max register+1) */
130 size_t reg_size;
131 /* Register descriptions, per register index */
132 struct etna_reg_desc *reg;
133 };
134
135 #define array_insert(arr, val) \
136 do { \
137 if (arr##_count == arr##_sz) { \
138 arr##_sz = MAX2(2 * arr##_sz, 16); \
139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140 } \
141 arr[arr##_count++] = val; \
142 } while (0)
143
144
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147 const struct tgsi_token *tokens;
148 bool free_tokens;
149
150 struct tgsi_shader_info info;
151
152 /* Register descriptions, per TGSI file, per register index */
153 struct etna_compile_file file[TGSI_FILE_COUNT];
154
155 /* Keep track of TGSI register declarations */
156 struct etna_reg_desc decl[ETNA_MAX_DECL];
157 uint total_decls;
158
159 /* Bitmap of dead instructions which are removed in a separate pass */
160 bool dead_inst[ETNA_MAX_TOKENS];
161
162 /* Immediate data */
163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164 uint32_t imm_data[ETNA_MAX_IMM];
165 uint32_t imm_base; /* base of immediates (in 32 bit units) */
166 uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168 /* Next free native register, for register allocation */
169 uint32_t next_free_native;
170
171 /* Temporary register for use within translated TGSI instruction,
172 * only allocated when needed.
173 */
174 int inner_temps; /* number of inner temps used; only up to one available at
175 this point */
176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178 /* Fields for handling nested conditionals */
179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180 int frame_sp;
181 struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183 unsigned labels_count, labels_sz;
184 struct etna_compile_label *labels;
185
186 unsigned num_loops;
187
188 /* Code generation */
189 int inst_ptr; /* current instruction pointer */
190 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191
192 /* I/O */
193
194 /* Number of varyings (PS only) */
195 int num_varyings;
196
197 /* GPU hardware specs */
198 const struct etna_specs *specs;
199 };
200
201 static struct etna_reg_desc *
202 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
203 {
204 return &c->file[dst.File].reg[dst.Index];
205 }
206
207 static struct etna_reg_desc *
208 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
209 {
210 return &c->file[src.File].reg[src.Index];
211 }
212
213 static struct etna_native_reg
214 etna_native_temp(unsigned reg)
215 {
216 return (struct etna_native_reg) {
217 .valid = 1,
218 .rgroup = INST_RGROUP_TEMP,
219 .id = reg
220 };
221 }
222
223 /** Register allocation **/
224 enum reg_sort_order {
225 FIRST_USE_ASC,
226 FIRST_USE_DESC,
227 LAST_USE_ASC,
228 LAST_USE_DESC
229 };
230
231 /* Augmented register description for sorting */
232 struct sort_rec {
233 struct etna_reg_desc *ptr;
234 int key;
235 };
236
237 static int
238 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
239 {
240 if (a->key < b->key)
241 return -1;
242
243 if (a->key > b->key)
244 return 1;
245
246 return 0;
247 }
248
249 /* create an index on a register set based on certain criteria. */
250 static int
251 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
252 enum reg_sort_order so)
253 {
254 struct etna_reg_desc *regs = file->reg;
255 int ptr = 0;
256
257 /* pre-populate keys from active registers */
258 for (int idx = 0; idx < file->reg_size; ++idx) {
259 /* only interested in active registers now; will only assign inactive ones
260 * if no space in active ones */
261 if (regs[idx].active) {
262 sorted[ptr].ptr = &regs[idx];
263
264 switch (so) {
265 case FIRST_USE_ASC:
266 sorted[ptr].key = regs[idx].first_use;
267 break;
268 case LAST_USE_ASC:
269 sorted[ptr].key = regs[idx].last_use;
270 break;
271 case FIRST_USE_DESC:
272 sorted[ptr].key = -regs[idx].first_use;
273 break;
274 case LAST_USE_DESC:
275 sorted[ptr].key = -regs[idx].last_use;
276 break;
277 }
278 ptr++;
279 }
280 }
281
282 /* sort index by key */
283 qsort(sorted, ptr, sizeof(struct sort_rec),
284 (int (*)(const void *, const void *))sort_rec_compar);
285
286 return ptr;
287 }
288
289 /* Allocate a new, unused, native temp register */
290 static struct etna_native_reg
291 alloc_new_native_reg(struct etna_compile *c)
292 {
293 assert(c->next_free_native < ETNA_MAX_TEMPS);
294 return etna_native_temp(c->next_free_native++);
295 }
296
297 /* assign TEMPs to native registers */
298 static void
299 assign_temporaries_to_native(struct etna_compile *c,
300 struct etna_compile_file *file)
301 {
302 struct etna_reg_desc *temps = file->reg;
303
304 for (int idx = 0; idx < file->reg_size; ++idx)
305 temps[idx].native = alloc_new_native_reg(c);
306 }
307
308 /* assign inputs and outputs to temporaries
309 * Gallium assumes that the hardware has separate registers for taking input and
310 * output, however Vivante GPUs use temporaries both for passing in inputs and
311 * passing back outputs.
312 * Try to re-use temporary registers where possible. */
313 static void
314 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
315 {
316 bool mode_inputs = (file == TGSI_FILE_INPUT);
317 int inout_ptr = 0, num_inouts;
318 int temp_ptr = 0, num_temps;
319 struct sort_rec inout_order[ETNA_MAX_TEMPS];
320 struct sort_rec temps_order[ETNA_MAX_TEMPS];
321 num_inouts = sort_registers(inout_order, &c->file[file],
322 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
323 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
324 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
325
326 while (inout_ptr < num_inouts && temp_ptr < num_temps) {
327 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
328 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
329
330 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
331 inout_ptr++;
332 continue;
333 }
334
335 /* last usage of this input is before or in same instruction of first use
336 * of temporary? */
337 if (mode_inputs ? (inout->last_use <= temp->first_use)
338 : (inout->first_use >= temp->last_use)) {
339 /* assign it and advance to next input */
340 inout->native = temp->native;
341 inout_ptr++;
342 }
343
344 temp_ptr++;
345 }
346
347 /* if we couldn't reuse current ones, allocate new temporaries */
348 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
349 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
350
351 if (inout->active && !inout->native.valid)
352 inout->native = alloc_new_native_reg(c);
353 }
354 }
355
356 /* Allocate an immediate with a certain value and return the index. If
357 * there is already an immediate with that value, return that.
358 */
359 static struct etna_inst_src
360 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
361 uint32_t value)
362 {
363 int idx;
364
365 /* Could use a hash table to speed this up */
366 for (idx = 0; idx < c->imm_size; ++idx) {
367 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
368 break;
369 }
370
371 /* look if there is an unused slot */
372 if (idx == c->imm_size) {
373 for (idx = 0; idx < c->imm_size; ++idx) {
374 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
375 break;
376 }
377 }
378
379 /* allocate new immediate */
380 if (idx == c->imm_size) {
381 assert(c->imm_size < ETNA_MAX_IMM);
382 idx = c->imm_size++;
383 c->imm_data[idx] = value;
384 c->imm_contents[idx] = contents;
385 }
386
387 /* swizzle so that component with value is returned in all components */
388 idx += c->imm_base;
389 struct etna_inst_src imm_src = {
390 .use = 1,
391 .rgroup = INST_RGROUP_UNIFORM_0,
392 .reg = idx / 4,
393 .swiz = INST_SWIZ_BROADCAST(idx & 3)
394 };
395
396 return imm_src;
397 }
398
399 static struct etna_inst_src
400 alloc_imm_u32(struct etna_compile *c, uint32_t value)
401 {
402 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
403 }
404
405 static struct etna_inst_src
406 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
407 const uint32_t *values)
408 {
409 struct etna_inst_src imm_src = { };
410 int idx, i;
411
412 for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
413 /* What if we can use a uniform with a different swizzle? */
414 for (i = 0; i < 4; i++)
415 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
416 break;
417 if (i == 4)
418 break;
419 }
420
421 if (idx + 3 >= c->imm_size) {
422 idx = align(c->imm_size, 4);
423 assert(idx + 4 <= ETNA_MAX_IMM);
424
425 for (i = 0; i < 4; i++) {
426 c->imm_data[idx + i] = values[i];
427 c->imm_contents[idx + i] = contents;
428 }
429
430 c->imm_size = idx + 4;
431 }
432
433 assert((c->imm_base & 3) == 0);
434 idx += c->imm_base;
435 imm_src.use = 1;
436 imm_src.rgroup = INST_RGROUP_UNIFORM_0;
437 imm_src.reg = idx / 4;
438 imm_src.swiz = INST_SWIZ_IDENTITY;
439
440 return imm_src;
441 }
442
443 static uint32_t
444 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
445 unsigned swiz_idx)
446 {
447 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
448 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
449
450 return c->imm_data[idx];
451 }
452
453 /* Allocate immediate with a certain float value. If there is already an
454 * immediate with that value, return that.
455 */
456 static struct etna_inst_src
457 alloc_imm_f32(struct etna_compile *c, float value)
458 {
459 return alloc_imm_u32(c, fui(value));
460 }
461
462 static struct etna_inst_src
463 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
464 {
465 uint32_t val[4];
466
467 for (int i = 0; i < 4; i++)
468 val[i] = fui(vec4[i]);
469
470 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
471 }
472
473 /* Pass -- check register file declarations and immediates */
474 static void
475 etna_compile_parse_declarations(struct etna_compile *c)
476 {
477 struct tgsi_parse_context ctx = { };
478 unsigned status = TGSI_PARSE_OK;
479 status = tgsi_parse_init(&ctx, c->tokens);
480 assert(status == TGSI_PARSE_OK);
481
482 while (!tgsi_parse_end_of_tokens(&ctx)) {
483 tgsi_parse_token(&ctx);
484
485 switch (ctx.FullToken.Token.Type) {
486 case TGSI_TOKEN_TYPE_IMMEDIATE: {
487 /* immediates are handled differently from other files; they are
488 * not declared explicitly, and always add four components */
489 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
490 assert(c->imm_size <= (ETNA_MAX_IMM - 4));
491
492 for (int i = 0; i < 4; ++i) {
493 unsigned idx = c->imm_size++;
494
495 c->imm_data[idx] = imm->u[i].Uint;
496 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
497 }
498 }
499 break;
500 }
501 }
502
503 tgsi_parse_free(&ctx);
504 }
505
506 /* Allocate register declarations for the registers in all register files */
507 static void
508 etna_allocate_decls(struct etna_compile *c)
509 {
510 uint idx = 0;
511
512 for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
513 c->file[x].reg = &c->decl[idx];
514 c->file[x].reg_size = c->info.file_max[x] + 1;
515
516 for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
517 c->decl[idx].file = x;
518 c->decl[idx].idx = sub;
519 idx++;
520 }
521 }
522
523 c->total_decls = idx;
524 }
525
526 /* Pass -- check and record usage of temporaries, inputs, outputs */
527 static void
528 etna_compile_pass_check_usage(struct etna_compile *c)
529 {
530 struct tgsi_parse_context ctx = { };
531 unsigned status = TGSI_PARSE_OK;
532 status = tgsi_parse_init(&ctx, c->tokens);
533 assert(status == TGSI_PARSE_OK);
534
535 for (int idx = 0; idx < c->total_decls; ++idx) {
536 c->decl[idx].active = false;
537 c->decl[idx].first_use = c->decl[idx].last_use = -1;
538 }
539
540 int inst_idx = 0;
541 while (!tgsi_parse_end_of_tokens(&ctx)) {
542 tgsi_parse_token(&ctx);
543 /* find out max register #s used
544 * For every register mark first and last instruction index where it's
545 * used this allows finding ranges where the temporary can be borrowed
546 * as input and/or output register
547 *
548 * XXX in the case of loops this needs special care, or even be completely
549 * disabled, as
550 * the last usage of a register inside a loop means it can still be used
551 * on next loop
552 * iteration (execution is no longer * chronological). The register can
553 * only be
554 * declared "free" after the loop finishes.
555 *
556 * Same for inputs: the first usage of a register inside a loop doesn't
557 * mean that the register
558 * won't have been overwritten in previous iteration. The register can
559 * only be declared free before the loop
560 * starts.
561 * The proper way would be to do full dominator / post-dominator analysis
562 * (especially with more complicated
563 * control flow such as direct branch instructions) but not for now...
564 */
565 switch (ctx.FullToken.Token.Type) {
566 case TGSI_TOKEN_TYPE_DECLARATION: {
567 /* Declaration: fill in file details */
568 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
569 struct etna_compile_file *file = &c->file[decl->Declaration.File];
570
571 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
572 file->reg[idx].usage_mask = 0; // we'll compute this ourselves
573 file->reg[idx].has_semantic = decl->Declaration.Semantic;
574 file->reg[idx].semantic = decl->Semantic;
575 file->reg[idx].interp = decl->Interp;
576 }
577 } break;
578 case TGSI_TOKEN_TYPE_INSTRUCTION: {
579 /* Instruction: iterate over operands of instruction */
580 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
581
582 /* iterate over destination registers */
583 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
584 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
585
586 if (reg_desc->first_use == -1)
587 reg_desc->first_use = inst_idx;
588
589 reg_desc->last_use = inst_idx;
590 reg_desc->active = true;
591 }
592
593 /* iterate over source registers */
594 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
595 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
596
597 if (reg_desc->first_use == -1)
598 reg_desc->first_use = inst_idx;
599
600 reg_desc->last_use = inst_idx;
601 reg_desc->active = true;
602 /* accumulate usage mask for register, this is used to determine how
603 * many slots for varyings
604 * should be allocated */
605 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
606 }
607 inst_idx += 1;
608 } break;
609 default:
610 break;
611 }
612 }
613
614 tgsi_parse_free(&ctx);
615 }
616
617 /* assign inputs that need to be assigned to specific registers */
618 static void
619 assign_special_inputs(struct etna_compile *c)
620 {
621 if (c->info.processor == PIPE_SHADER_FRAGMENT) {
622 /* never assign t0 as it is the position output, start assigning at t1 */
623 c->next_free_native = 1;
624
625 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
626 for (int idx = 0; idx < c->total_decls; ++idx) {
627 struct etna_reg_desc *reg = &c->decl[idx];
628
629 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
630 reg->native = etna_native_temp(0);
631 }
632 }
633 }
634
635 /* Check that a move instruction does not swizzle any of the components
636 * that it writes.
637 */
638 static bool
639 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
640 const struct tgsi_src_register src)
641 {
642 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
643 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
644 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
645 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
646 }
647
648 /* Pass -- optimize outputs
649 * Mesa tends to generate code like this at the end if their shaders
650 * MOV OUT[1], TEMP[2]
651 * MOV OUT[0], TEMP[0]
652 * MOV OUT[2], TEMP[1]
653 * Recognize if
654 * a) there is only a single assignment to an output register and
655 * b) the temporary is not used after that
656 * Also recognize direct assignment of IN to OUT (passthrough)
657 **/
658 static void
659 etna_compile_pass_optimize_outputs(struct etna_compile *c)
660 {
661 struct tgsi_parse_context ctx = { };
662 int inst_idx = 0;
663 unsigned status = TGSI_PARSE_OK;
664 status = tgsi_parse_init(&ctx, c->tokens);
665 assert(status == TGSI_PARSE_OK);
666
667 while (!tgsi_parse_end_of_tokens(&ctx)) {
668 tgsi_parse_token(&ctx);
669
670 switch (ctx.FullToken.Token.Type) {
671 case TGSI_TOKEN_TYPE_INSTRUCTION: {
672 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
673
674 /* iterate over operands */
675 switch (inst->Instruction.Opcode) {
676 case TGSI_OPCODE_MOV: {
677 /* We are only interested in eliminating MOVs which write to
678 * the shader outputs. Test for this early. */
679 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
680 break;
681 /* Elimination of a MOV must have no visible effect on the
682 * resulting shader: this means the MOV must not swizzle or
683 * saturate, and its source must not have the negate or
684 * absolute modifiers. */
685 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
686 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
687 inst->Src[0].Register.Absolute)
688 break;
689
690 uint out_idx = inst->Dst[0].Register.Index;
691 uint in_idx = inst->Src[0].Register.Index;
692 /* assignment of temporary to output --
693 * and the output doesn't yet have a native register assigned
694 * and the last use of the temporary is this instruction
695 * and the MOV does not do a swizzle
696 */
697 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
698 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
699 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
700 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
701 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
702 /* prevent temp from being re-used for the rest of the shader */
703 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
704 /* mark this MOV instruction as a no-op */
705 c->dead_inst[inst_idx] = true;
706 }
707 /* direct assignment of input to output --
708 * and the input or output doesn't yet have a native register
709 * assigned
710 * and the output is only used in this instruction,
711 * allocate a new register, and associate both input and output to
712 * it
713 * and the MOV does not do a swizzle
714 */
715 if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
716 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
717 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
718 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
719 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
720 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
721 c->file[TGSI_FILE_INPUT].reg[in_idx].native =
722 alloc_new_native_reg(c);
723 /* mark this MOV instruction as a no-op */
724 c->dead_inst[inst_idx] = true;
725 }
726 } break;
727 default:;
728 }
729 inst_idx += 1;
730 } break;
731 }
732 }
733
734 tgsi_parse_free(&ctx);
735 }
736
737 /* Get a temporary to be used within one TGSI instruction.
738 * The first time that this function is called the temporary will be allocated.
739 * Each call to this function will return the same temporary.
740 */
741 static struct etna_native_reg
742 etna_compile_get_inner_temp(struct etna_compile *c)
743 {
744 int inner_temp = c->inner_temps;
745
746 if (inner_temp < ETNA_MAX_INNER_TEMPS) {
747 if (!c->inner_temp[inner_temp].valid)
748 c->inner_temp[inner_temp] = alloc_new_native_reg(c);
749
750 /* alloc_new_native_reg() handles lack of registers */
751 c->inner_temps += 1;
752 } else {
753 BUG("Too many inner temporaries (%i) requested in one instruction",
754 inner_temp + 1);
755 }
756
757 return c->inner_temp[inner_temp];
758 }
759
760 static struct etna_inst_dst
761 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
762 {
763 /* Can only assign to temporaries */
764 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
765
766 struct etna_inst_dst rv = {
767 .comps = comps,
768 .use = 1,
769 .reg = native.id,
770 };
771
772 return rv;
773 }
774
775 static struct etna_inst_src
776 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
777 {
778 assert(native.valid && !native.is_tex);
779
780 struct etna_inst_src rv = {
781 .use = 1,
782 .swiz = swizzle,
783 .rgroup = native.rgroup,
784 .reg = native.id,
785 .amode = INST_AMODE_DIRECT,
786 };
787
788 return rv;
789 }
790
791 static inline struct etna_inst_src
792 negate(struct etna_inst_src src)
793 {
794 src.neg = !src.neg;
795
796 return src;
797 }
798
799 static inline struct etna_inst_src
800 absolute(struct etna_inst_src src)
801 {
802 src.abs = 1;
803
804 return src;
805 }
806
807 static inline struct etna_inst_src
808 swizzle(struct etna_inst_src src, unsigned swizzle)
809 {
810 src.swiz = inst_swiz_compose(src.swiz, swizzle);
811
812 return src;
813 }
814
815 /* Emit instruction and append it to program */
816 static void
817 emit_inst(struct etna_compile *c, struct etna_inst *inst)
818 {
819 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
820
821 /* Check for uniform conflicts (each instruction can only access one
822 * uniform),
823 * if detected, use an intermediate temporary */
824 unsigned uni_rgroup = -1;
825 unsigned uni_reg = -1;
826
827 for (int src = 0; src < ETNA_NUM_SRC; ++src) {
828 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
829 if (uni_reg == -1) { /* first unique uniform used */
830 uni_rgroup = inst->src[src].rgroup;
831 uni_reg = inst->src[src].reg;
832 } else { /* second or later; check that it is a re-use */
833 if (uni_rgroup != inst->src[src].rgroup ||
834 uni_reg != inst->src[src].reg) {
835 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
836 "accesses different uniforms, "
837 "need to generate extra MOV");
838 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
839
840 /* Generate move instruction to temporary */
841 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
842 .opcode = INST_OPCODE_MOV,
843 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
844 INST_COMPS_Z | INST_COMPS_W),
845 .src[2] = inst->src[src]
846 });
847
848 c->inst_ptr++;
849
850 /* Modify instruction to use temp register instead of uniform */
851 inst->src[src].use = 1;
852 inst->src[src].rgroup = INST_RGROUP_TEMP;
853 inst->src[src].reg = inner_temp.id;
854 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
855 inst->src[src].neg = 0; /* negation happens on MOV */
856 inst->src[src].abs = 0; /* abs happens on MOV */
857 inst->src[src].amode = 0; /* amode effects happen on MOV */
858 }
859 }
860 }
861 }
862
863 /* Finally assemble the actual instruction */
864 etna_assemble(&c->code[c->inst_ptr * 4], inst);
865 c->inst_ptr++;
866 }
867
868 static unsigned int
869 etna_amode(struct tgsi_ind_register indirect)
870 {
871 assert(indirect.File == TGSI_FILE_ADDRESS);
872 assert(indirect.Index == 0);
873
874 switch (indirect.Swizzle) {
875 case TGSI_SWIZZLE_X:
876 return INST_AMODE_ADD_A_X;
877 case TGSI_SWIZZLE_Y:
878 return INST_AMODE_ADD_A_Y;
879 case TGSI_SWIZZLE_Z:
880 return INST_AMODE_ADD_A_Z;
881 case TGSI_SWIZZLE_W:
882 return INST_AMODE_ADD_A_W;
883 default:
884 assert(!"Invalid swizzle");
885 }
886 }
887
888 /* convert destination operand */
889 static struct etna_inst_dst
890 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
891 {
892 struct etna_inst_dst rv = {
893 /// XXX .amode
894 .comps = in->Register.WriteMask,
895 };
896
897 if (in->Register.File == TGSI_FILE_ADDRESS) {
898 assert(in->Register.Index == 0);
899 rv.reg = in->Register.Index;
900 rv.use = 0;
901 } else {
902 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
903 in->Register.WriteMask);
904 }
905
906 if (in->Register.Indirect)
907 rv.amode = etna_amode(in->Indirect);
908
909 return rv;
910 }
911
912 /* convert texture operand */
913 static struct etna_inst_tex
914 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
915 const struct tgsi_instruction_texture *tex)
916 {
917 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
918 struct etna_inst_tex rv = {
919 // XXX .amode (to allow for an array of samplers?)
920 .swiz = INST_SWIZ_IDENTITY
921 };
922
923 assert(native_reg.is_tex && native_reg.valid);
924 rv.id = native_reg.id;
925
926 return rv;
927 }
928
929 /* convert source operand */
930 static struct etna_inst_src
931 etna_create_src(const struct tgsi_full_src_register *tgsi,
932 const struct etna_native_reg *native)
933 {
934 const struct tgsi_src_register *reg = &tgsi->Register;
935 struct etna_inst_src rv = {
936 .use = 1,
937 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
938 .neg = reg->Negate,
939 .abs = reg->Absolute,
940 .rgroup = native->rgroup,
941 .reg = native->id,
942 .amode = INST_AMODE_DIRECT,
943 };
944
945 assert(native->valid && !native->is_tex);
946
947 if (reg->Indirect)
948 rv.amode = etna_amode(tgsi->Indirect);
949
950 return rv;
951 }
952
953 static struct etna_inst_src
954 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
955 struct etna_native_reg temp)
956 {
957 struct etna_inst mov = { };
958
959 mov.opcode = INST_OPCODE_MOV;
960 mov.sat = 0;
961 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
962 INST_COMPS_Z | INST_COMPS_W);
963 mov.src[2] = src;
964 emit_inst(c, &mov);
965
966 src.swiz = INST_SWIZ_IDENTITY;
967 src.neg = src.abs = 0;
968 src.rgroup = temp.rgroup;
969 src.reg = temp.id;
970
971 return src;
972 }
973
974 static struct etna_inst_src
975 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
976 {
977 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
978
979 return etna_mov_src_to_temp(c, src, temp);
980 }
981
982 static bool
983 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
984 {
985 return etna_rgroup_is_uniform(a.rgroup) &&
986 etna_rgroup_is_uniform(b.rgroup) &&
987 (a.rgroup != b.rgroup || a.reg != b.reg);
988 }
989
990 /* create a new label */
991 static struct etna_compile_label *
992 alloc_new_label(struct etna_compile *c)
993 {
994 struct etna_compile_label label = {
995 .inst_idx = -1, /* start by point to no specific instruction */
996 };
997
998 array_insert(c->labels, label);
999
1000 return &c->labels[c->labels_count - 1];
1001 }
1002
1003 /* place label at current instruction pointer */
1004 static void
1005 label_place(struct etna_compile *c, struct etna_compile_label *label)
1006 {
1007 label->inst_idx = c->inst_ptr;
1008 }
1009
1010 /* mark label use at current instruction.
1011 * target of the label will be filled in in the marked instruction's src2.imm
1012 * slot as soon
1013 * as the value becomes known.
1014 */
1015 static void
1016 label_mark_use(struct etna_compile *c, struct etna_compile_label *label)
1017 {
1018 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1019 c->lbl_usage[c->inst_ptr] = label;
1020 }
1021
1022 /* walk the frame stack and return first frame with matching type */
1023 static struct etna_compile_frame *
1024 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1025 {
1026 for (int sp = c->frame_sp; sp >= 0; sp--)
1027 if (c->frame_stack[sp].type == type)
1028 return &c->frame_stack[sp];
1029
1030 assert(0);
1031 return NULL;
1032 }
1033
1034 struct instr_translater {
1035 void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1036 const struct tgsi_full_instruction *inst,
1037 struct etna_inst_src *src);
1038 unsigned tgsi_opc;
1039 uint8_t opc;
1040
1041 /* tgsi src -> etna src swizzle */
1042 int src[3];
1043
1044 unsigned cond;
1045 };
1046
1047 static void
1048 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1049 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1050 {
1051 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1052 struct etna_inst instr = { };
1053
1054 instr.opcode = t->opc;
1055 instr.cond = t->cond;
1056 instr.sat = inst->Instruction.Saturate;
1057
1058 assert(info->num_dst <= 1);
1059 if (info->num_dst)
1060 instr.dst = convert_dst(c, &inst->Dst[0]);
1061
1062 assert(info->num_src <= ETNA_NUM_SRC);
1063
1064 for (unsigned i = 0; i < info->num_src; i++) {
1065 int swizzle = t->src[i];
1066
1067 assert(swizzle != -1);
1068 instr.src[swizzle] = src[i];
1069 }
1070
1071 emit_inst(c, &instr);
1072 }
1073
1074 static void
1075 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1076 const struct tgsi_full_instruction *inst,
1077 struct etna_inst_src *src)
1078 {
1079 emit_inst(c, &(struct etna_inst) {
1080 .opcode = INST_OPCODE_SELECT,
1081 .cond = t->cond,
1082 .sat = inst->Instruction.Saturate,
1083 .dst = convert_dst(c, &inst->Dst[0]),
1084 .src[0] = src[0],
1085 .src[1] = src[1],
1086 .src[2] = src[0],
1087 });
1088 }
1089
1090 static void
1091 trans_if(const struct instr_translater *t, struct etna_compile *c,
1092 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1093 {
1094 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1095 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1096
1097 /* push IF to stack */
1098 f->type = ETNA_COMPILE_FRAME_IF;
1099 /* create "else" label */
1100 f->lbl_else = alloc_new_label(c);
1101 f->lbl_endif = NULL;
1102
1103 /* We need to avoid the emit_inst() below becoming two instructions */
1104 if (etna_src_uniforms_conflict(src[0], imm_0))
1105 src[0] = etna_mov_src(c, src[0]);
1106
1107 /* mark position in instruction stream of label reference so that it can be
1108 * filled in in next pass */
1109 label_mark_use(c, f->lbl_else);
1110
1111 /* create conditional branch to label if src0 EQ 0 */
1112 emit_inst(c, &(struct etna_inst){
1113 .opcode = INST_OPCODE_BRANCH,
1114 .cond = INST_CONDITION_EQ,
1115 .src[0] = src[0],
1116 .src[1] = imm_0,
1117 /* imm is filled in later */
1118 });
1119 }
1120
1121 static void
1122 trans_else(const struct instr_translater *t, struct etna_compile *c,
1123 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1124 {
1125 assert(c->frame_sp > 0);
1126 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1127 assert(f->type == ETNA_COMPILE_FRAME_IF);
1128
1129 /* create "endif" label, and branch to endif label */
1130 f->lbl_endif = alloc_new_label(c);
1131 label_mark_use(c, f->lbl_endif);
1132 emit_inst(c, &(struct etna_inst) {
1133 .opcode = INST_OPCODE_BRANCH,
1134 .cond = INST_CONDITION_TRUE,
1135 /* imm is filled in later */
1136 });
1137
1138 /* mark "else" label at this position in instruction stream */
1139 label_place(c, f->lbl_else);
1140 }
1141
1142 static void
1143 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1144 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1145 {
1146 assert(c->frame_sp > 0);
1147 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1148 assert(f->type == ETNA_COMPILE_FRAME_IF);
1149
1150 /* assign "endif" or "else" (if no ELSE) label to current position in
1151 * instruction stream, pop IF */
1152 if (f->lbl_endif != NULL)
1153 label_place(c, f->lbl_endif);
1154 else
1155 label_place(c, f->lbl_else);
1156 }
1157
1158 static void
1159 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1160 const struct tgsi_full_instruction *inst,
1161 struct etna_inst_src *src)
1162 {
1163 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1164
1165 /* push LOOP to stack */
1166 f->type = ETNA_COMPILE_FRAME_LOOP;
1167 f->lbl_loop_bgn = alloc_new_label(c);
1168 f->lbl_loop_end = alloc_new_label(c);
1169
1170 label_place(c, f->lbl_loop_bgn);
1171
1172 c->num_loops++;
1173 }
1174
1175 static void
1176 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1177 const struct tgsi_full_instruction *inst,
1178 struct etna_inst_src *src)
1179 {
1180 assert(c->frame_sp > 0);
1181 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1182 assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1183
1184 /* mark position in instruction stream of label reference so that it can be
1185 * filled in in next pass */
1186 label_mark_use(c, f->lbl_loop_bgn);
1187
1188 /* create branch to loop_bgn label */
1189 emit_inst(c, &(struct etna_inst) {
1190 .opcode = INST_OPCODE_BRANCH,
1191 .cond = INST_CONDITION_TRUE,
1192 .src[0] = src[0],
1193 /* imm is filled in later */
1194 });
1195
1196 label_place(c, f->lbl_loop_end);
1197 }
1198
1199 static void
1200 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1201 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1202 {
1203 assert(c->frame_sp > 0);
1204 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1205
1206 /* mark position in instruction stream of label reference so that it can be
1207 * filled in in next pass */
1208 label_mark_use(c, f->lbl_loop_end);
1209
1210 /* create branch to loop_end label */
1211 emit_inst(c, &(struct etna_inst) {
1212 .opcode = INST_OPCODE_BRANCH,
1213 .cond = INST_CONDITION_TRUE,
1214 .src[0] = src[0],
1215 /* imm is filled in later */
1216 });
1217 }
1218
1219 static void
1220 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1221 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1222 {
1223 assert(c->frame_sp > 0);
1224 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1225
1226 /* mark position in instruction stream of label reference so that it can be
1227 * filled in in next pass */
1228 label_mark_use(c, f->lbl_loop_bgn);
1229
1230 /* create branch to loop_end label */
1231 emit_inst(c, &(struct etna_inst) {
1232 .opcode = INST_OPCODE_BRANCH,
1233 .cond = INST_CONDITION_TRUE,
1234 .src[0] = src[0],
1235 /* imm is filled in later */
1236 });
1237 }
1238
1239 static void
1240 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1241 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1242 {
1243 emit_inst(c, &(struct etna_inst) {
1244 .opcode = t->opc,
1245 .sat = inst->Instruction.Saturate,
1246 .dst = convert_dst(c, &inst->Dst[0]),
1247 .src[0] = src[0],
1248 .src[2] = src[0],
1249 });
1250 }
1251
1252 static void
1253 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1254 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1255 {
1256 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1257 struct etna_inst arl = { };
1258 struct etna_inst_dst dst;
1259
1260 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1261 INST_COMPS_W);
1262
1263 if (c->specs->has_sign_floor_ceil) {
1264 struct etna_inst floor = { };
1265
1266 floor.opcode = INST_OPCODE_FLOOR;
1267 floor.src[2] = src[0];
1268 floor.dst = dst;
1269
1270 emit_inst(c, &floor);
1271 } else {
1272 struct etna_inst floor[2] = { };
1273
1274 floor[0].opcode = INST_OPCODE_FRC;
1275 floor[0].sat = inst->Instruction.Saturate;
1276 floor[0].dst = dst;
1277 floor[0].src[2] = src[0];
1278
1279 floor[1].opcode = INST_OPCODE_ADD;
1280 floor[1].sat = inst->Instruction.Saturate;
1281 floor[1].dst = dst;
1282 floor[1].src[0] = src[0];
1283 floor[1].src[2].use = 1;
1284 floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1285 floor[1].src[2].neg = 1;
1286 floor[1].src[2].rgroup = temp.rgroup;
1287 floor[1].src[2].reg = temp.id;
1288
1289 emit_inst(c, &floor[0]);
1290 emit_inst(c, &floor[1]);
1291 }
1292
1293 arl.opcode = INST_OPCODE_MOVAR;
1294 arl.sat = inst->Instruction.Saturate;
1295 arl.dst = convert_dst(c, &inst->Dst[0]);
1296 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1297
1298 emit_inst(c, &arl);
1299 }
1300
1301 static void
1302 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1303 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1304 {
1305 /* dst = src0 * src1 + (1 - src0) * src2
1306 * => src0 * src1 - (src0 - 1) * src2
1307 * => src0 * src1 - (src0 * src2 - src2)
1308 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1309 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1310 */
1311 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1312 if (etna_src_uniforms_conflict(src[0], src[1]) ||
1313 etna_src_uniforms_conflict(src[0], src[2])) {
1314 src[0] = etna_mov_src(c, src[0]);
1315 }
1316
1317 struct etna_inst mad[2] = { };
1318 mad[0].opcode = INST_OPCODE_MAD;
1319 mad[0].sat = 0;
1320 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1321 INST_COMPS_Z | INST_COMPS_W);
1322 mad[0].src[0] = src[0];
1323 mad[0].src[1] = src[2];
1324 mad[0].src[2] = negate(src[2]);
1325 mad[1].opcode = INST_OPCODE_MAD;
1326 mad[1].sat = inst->Instruction.Saturate;
1327 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1328 mad[1].src[1] = src[1];
1329 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1330
1331 emit_inst(c, &mad[0]);
1332 emit_inst(c, &mad[1]);
1333 }
1334
1335 static void
1336 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1337 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1338 {
1339 /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1340 * - can be eliminated if src.y is a uniform and >= 0
1341 * SELECT.GT tmp.___w, 128, src.wwww, 128
1342 * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1343 * - can be eliminated if src.w is a uniform and fits clamp
1344 * LOG tmp.x, void, void, tmp.yyyy
1345 * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1346 * LITP dst, undef, src.xxxx, tmp.xxxx
1347 */
1348 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1349 struct etna_inst_src src_y = { };
1350
1351 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1352 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1353
1354 struct etna_inst ins = { };
1355 ins.opcode = INST_OPCODE_SELECT;
1356 ins.cond = INST_CONDITION_LT;
1357 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1358 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1359 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1360 emit_inst(c, &ins);
1361 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1362 src_y = alloc_imm_f32(c, 0.0);
1363 else
1364 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1365
1366 struct etna_inst_src src_w = { };
1367
1368 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1369 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1370
1371 struct etna_inst ins = { };
1372 ins.opcode = INST_OPCODE_SELECT;
1373 ins.cond = INST_CONDITION_GT;
1374 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1375 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1376 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1377 emit_inst(c, &ins);
1378 ins.cond = INST_CONDITION_LT;
1379 ins.src[0].neg = !ins.src[0].neg;
1380 ins.src[2].neg = !ins.src[2].neg;
1381 ins.src[1] = src_w;
1382 emit_inst(c, &ins);
1383 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1384 src_w = alloc_imm_f32(c, -128.);
1385 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1386 src_w = alloc_imm_f32(c, 128.);
1387 else
1388 src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1389
1390 struct etna_inst ins[3] = { };
1391 ins[0].opcode = INST_OPCODE_LOG;
1392 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1393 ins[0].src[2] = src_y;
1394
1395 emit_inst(c, &ins[0]);
1396 emit_inst(c, &(struct etna_inst) {
1397 .opcode = INST_OPCODE_MUL,
1398 .sat = 0,
1399 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1400 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1401 .src[1] = src_w,
1402 });
1403 emit_inst(c, &(struct etna_inst) {
1404 .opcode = INST_OPCODE_LITP,
1405 .sat = 0,
1406 .dst = convert_dst(c, &inst->Dst[0]),
1407 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1408 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1409 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1410 });
1411 }
1412
1413 static void
1414 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1415 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1416 {
1417 if (c->specs->has_sign_floor_ceil) {
1418 emit_inst(c, &(struct etna_inst){
1419 .opcode = INST_OPCODE_SIGN,
1420 .sat = inst->Instruction.Saturate,
1421 .dst = convert_dst(c, &inst->Dst[0]),
1422 .src[2] = src[0],
1423 });
1424 } else {
1425 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1426 struct etna_inst ins[2] = { };
1427
1428 ins[0].opcode = INST_OPCODE_SET;
1429 ins[0].cond = INST_CONDITION_NZ;
1430 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1431 INST_COMPS_Z | INST_COMPS_W);
1432 ins[0].src[0] = src[0];
1433
1434 ins[1].opcode = INST_OPCODE_SELECT;
1435 ins[1].cond = INST_CONDITION_LZ;
1436 ins[1].sat = inst->Instruction.Saturate;
1437 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1438 ins[1].src[0] = src[0];
1439 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1440 ins[1].src[1] = negate(ins[1].src[2]);
1441
1442 emit_inst(c, &ins[0]);
1443 emit_inst(c, &ins[1]);
1444 }
1445 }
1446
1447 static void
1448 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1449 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1450 {
1451 if (c->specs->has_new_sin_cos) { /* Alternative SIN/COS */
1452 /* On newer chips alternative SIN/COS instructions are implemented,
1453 * which:
1454 * - Need their input scaled by 1/pi instead of 2/pi
1455 * - Output an x and y component, which need to be multiplied to
1456 * get the result
1457 */
1458 /* TGSI lowering should deal with SCS */
1459 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1460
1461 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1462 emit_inst(c, &(struct etna_inst) {
1463 .opcode = INST_OPCODE_MUL,
1464 .sat = 0,
1465 .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1466 .src[0] = src[0], /* any swizzling happens here */
1467 .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1468 });
1469 emit_inst(c, &(struct etna_inst) {
1470 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1471 ? INST_OPCODE_COS
1472 : INST_OPCODE_SIN,
1473 .sat = 0,
1474 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1475 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1476 .tex = { .amode=1 }, /* Unknown bit needs to be set */
1477 });
1478 emit_inst(c, &(struct etna_inst) {
1479 .opcode = INST_OPCODE_MUL,
1480 .sat = inst->Instruction.Saturate,
1481 .dst = convert_dst(c, &inst->Dst[0]),
1482 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1483 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1484 });
1485
1486 } else if (c->specs->has_sin_cos_sqrt) {
1487 /* TGSI lowering should deal with SCS */
1488 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1489
1490 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1491 /* add divide by PI/2, using a temp register. GC2000
1492 * fails with src==dst for the trig instruction. */
1493 emit_inst(c, &(struct etna_inst) {
1494 .opcode = INST_OPCODE_MUL,
1495 .sat = 0,
1496 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1497 INST_COMPS_Z | INST_COMPS_W),
1498 .src[0] = src[0], /* any swizzling happens here */
1499 .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1500 });
1501 emit_inst(c, &(struct etna_inst) {
1502 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1503 ? INST_OPCODE_COS
1504 : INST_OPCODE_SIN,
1505 .sat = inst->Instruction.Saturate,
1506 .dst = convert_dst(c, &inst->Dst[0]),
1507 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1508 });
1509 } else {
1510 /* Implement Nick's fast sine/cosine. Taken from:
1511 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1512 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1513 * MAD t.x_zw, src.xxxx, A, B
1514 * FRC t.x_z_, void, void, t.xwzw
1515 * MAD t.x_z_, t.xwzw, 2, -1
1516 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
1517 * DP3 t.x_z_, t.zyww, C, void (for sin)
1518 * DP3 t.__z_, t.zyww, C, void (for scs)
1519 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
1520 * DP3 t.x_z_, t.xyww, C, void (for cos)
1521 * DP3 t.x___, t.xyww, C, void (for scs)
1522 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1523 * MAD dst, t.ywyw, .2225, t.xzxz
1524 *
1525 * TODO: we don't set dst.zw correctly for SCS.
1526 */
1527 struct etna_inst *p, ins[9] = { };
1528 struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1529 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1530 struct etna_inst_src sincos[3], in = src[0];
1531 sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1532 sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1533
1534 /* A uniform source will cause the inner temp limit to
1535 * be exceeded. Explicitly deal with that scenario.
1536 */
1537 if (etna_rgroup_is_uniform(src[0].rgroup)) {
1538 struct etna_inst ins = { };
1539 ins.opcode = INST_OPCODE_MOV;
1540 ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1541 ins.src[2] = in;
1542 emit_inst(c, &ins);
1543 in = t0s;
1544 }
1545
1546 ins[0].opcode = INST_OPCODE_MAD;
1547 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1548 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1549 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1550 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1551
1552 ins[1].opcode = INST_OPCODE_FRC;
1553 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1554 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1555
1556 ins[2].opcode = INST_OPCODE_MAD;
1557 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1558 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1559 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1560 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1561
1562 unsigned mul_swiz, dp3_swiz;
1563 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1564 mul_swiz = SWIZZLE(W, Z, W, W);
1565 dp3_swiz = SWIZZLE(Z, Y, W, W);
1566 } else {
1567 mul_swiz = SWIZZLE(W, X, W, W);
1568 dp3_swiz = SWIZZLE(X, Y, W, W);
1569 }
1570
1571 ins[3].opcode = INST_OPCODE_MUL;
1572 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1573 ins[3].src[0] = swizzle(t0s, mul_swiz);
1574 ins[3].src[1] = absolute(ins[3].src[0]);
1575
1576 ins[4].opcode = INST_OPCODE_DP3;
1577 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1578 ins[4].src[0] = swizzle(t0s, dp3_swiz);
1579 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1580
1581 if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
1582 ins[5] = ins[3];
1583 ins[6] = ins[4];
1584 ins[4].dst.comps = INST_COMPS_X;
1585 ins[6].dst.comps = INST_COMPS_Z;
1586 ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
1587 ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
1588 ins[5].src[1] = absolute(ins[5].src[0]);
1589 p = &ins[7];
1590 } else {
1591 p = &ins[5];
1592 }
1593
1594 p->opcode = INST_OPCODE_MAD;
1595 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1596 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1597 p->src[1] = absolute(p->src[0]);
1598 p->src[2] = negate(p->src[0]);
1599
1600 p++;
1601 p->opcode = INST_OPCODE_MAD;
1602 p->sat = inst->Instruction.Saturate;
1603 p->dst = convert_dst(c, &inst->Dst[0]),
1604 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1605 p->src[1] = alloc_imm_f32(c, 0.2225);
1606 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1607
1608 for (int i = 0; &ins[i] <= p; i++)
1609 emit_inst(c, &ins[i]);
1610 }
1611 }
1612
1613 static void
1614 trans_dph(const struct instr_translater *t, struct etna_compile *c,
1615 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1616 {
1617 /*
1618 DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
1619 ADD dst.xyzw, tmp.xyzw, void, src1.wwww
1620 */
1621 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1622 struct etna_inst ins[2] = { };
1623
1624 ins[0].opcode = INST_OPCODE_DP3;
1625 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1626 INST_COMPS_Z | INST_COMPS_W);
1627 ins[0].src[0] = src[0];
1628 ins[0].src[1] = src[1];
1629
1630 ins[1].opcode = INST_OPCODE_ADD;
1631 ins[1].sat = inst->Instruction.Saturate;
1632 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1633 ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1634 ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
1635
1636 emit_inst(c, &ins[0]);
1637 emit_inst(c, &ins[1]);
1638 }
1639
1640 static void
1641 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1642 const struct tgsi_full_instruction *inst,
1643 struct etna_inst_src *src)
1644 {
1645 /* There is no native support for GL texture rectangle coordinates, so
1646 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1647 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1648 uint32_t unit = inst->Src[1].Register.Index;
1649 struct etna_inst ins[2] = { };
1650 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1651
1652 ins[0].opcode = INST_OPCODE_MUL;
1653 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1654 ins[0].src[0] = src[0];
1655 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1656
1657 ins[1].opcode = INST_OPCODE_MUL;
1658 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1659 ins[1].src[0] = src[0];
1660 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1661
1662 emit_inst(c, &ins[0]);
1663 emit_inst(c, &ins[1]);
1664
1665 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1666 }
1667
1668 switch (inst->Instruction.Opcode) {
1669 case TGSI_OPCODE_TEX:
1670 emit_inst(c, &(struct etna_inst) {
1671 .opcode = INST_OPCODE_TEXLD,
1672 .sat = 0,
1673 .dst = convert_dst(c, &inst->Dst[0]),
1674 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1675 .src[0] = src[0],
1676 });
1677 break;
1678
1679 case TGSI_OPCODE_TXB:
1680 emit_inst(c, &(struct etna_inst) {
1681 .opcode = INST_OPCODE_TEXLDB,
1682 .sat = 0,
1683 .dst = convert_dst(c, &inst->Dst[0]),
1684 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1685 .src[0] = src[0],
1686 });
1687 break;
1688
1689 case TGSI_OPCODE_TXL:
1690 emit_inst(c, &(struct etna_inst) {
1691 .opcode = INST_OPCODE_TEXLDL,
1692 .sat = 0,
1693 .dst = convert_dst(c, &inst->Dst[0]),
1694 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1695 .src[0] = src[0],
1696 });
1697 break;
1698
1699 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1700 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1701
1702 emit_inst(c, &(struct etna_inst) {
1703 .opcode = INST_OPCODE_RCP,
1704 .sat = 0,
1705 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1706 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1707 });
1708 emit_inst(c, &(struct etna_inst) {
1709 .opcode = INST_OPCODE_MUL,
1710 .sat = 0,
1711 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1712 INST_COMPS_Z), /* tmp.xyz */
1713 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1714 .src[1] = src[0], /* src.xyzw */
1715 });
1716 emit_inst(c, &(struct etna_inst) {
1717 .opcode = INST_OPCODE_TEXLD,
1718 .sat = 0,
1719 .dst = convert_dst(c, &inst->Dst[0]),
1720 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1721 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1722 });
1723 } break;
1724
1725 default:
1726 BUG("Unhandled instruction %s",
1727 tgsi_get_opcode_name(inst->Instruction.Opcode));
1728 assert(0);
1729 break;
1730 }
1731 }
1732
1733 static void
1734 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1735 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1736 {
1737 /* nothing to do */
1738 }
1739
1740 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1741 #define INSTR(n, f, ...) \
1742 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1743
1744 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1745 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1746 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1747 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1748 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1749 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1750 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1751 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1752 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1753 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1754 INSTR(LG2, trans_instr, .opc = INST_OPCODE_LOG, .src = {2, -1, -1}),
1755 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1756 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1757 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1758 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1759 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1760
1761 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1762 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1763
1764 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1765 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1766
1767 INSTR(IF, trans_if),
1768 INSTR(ELSE, trans_else),
1769 INSTR(ENDIF, trans_endif),
1770
1771 INSTR(BGNLOOP, trans_loop_bgn),
1772 INSTR(ENDLOOP, trans_loop_end),
1773 INSTR(BRK, trans_brk),
1774 INSTR(CONT, trans_cont),
1775
1776 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1777 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1778
1779 INSTR(ARL, trans_arl),
1780 INSTR(LRP, trans_lrp),
1781 INSTR(LIT, trans_lit),
1782 INSTR(SSG, trans_ssg),
1783 INSTR(DPH, trans_dph),
1784
1785 INSTR(SIN, trans_trig),
1786 INSTR(COS, trans_trig),
1787 INSTR(SCS, trans_trig),
1788
1789 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1790 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1791 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1792 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1793 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1794 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1795
1796 INSTR(TEX, trans_sampler),
1797 INSTR(TXB, trans_sampler),
1798 INSTR(TXL, trans_sampler),
1799 INSTR(TXP, trans_sampler),
1800
1801 INSTR(NOP, trans_dummy),
1802 INSTR(END, trans_dummy),
1803 };
1804
1805 /* Pass -- compile instructions */
1806 static void
1807 etna_compile_pass_generate_code(struct etna_compile *c)
1808 {
1809 struct tgsi_parse_context ctx = { };
1810 unsigned status = tgsi_parse_init(&ctx, c->tokens);
1811 assert(status == TGSI_PARSE_OK);
1812
1813 int inst_idx = 0;
1814 while (!tgsi_parse_end_of_tokens(&ctx)) {
1815 const struct tgsi_full_instruction *inst = 0;
1816
1817 /* No inner temps used yet for this instruction, clear counter */
1818 c->inner_temps = 0;
1819
1820 tgsi_parse_token(&ctx);
1821
1822 switch (ctx.FullToken.Token.Type) {
1823 case TGSI_TOKEN_TYPE_INSTRUCTION:
1824 /* iterate over operands */
1825 inst = &ctx.FullToken.FullInstruction;
1826 if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1827 inst_idx++;
1828 continue;
1829 }
1830
1831 /* Lookup the TGSI information and generate the source arguments */
1832 struct etna_inst_src src[ETNA_NUM_SRC];
1833 memset(src, 0, sizeof(src));
1834
1835 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1836
1837 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1838 const struct tgsi_full_src_register *reg = &inst->Src[i];
1839 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1840
1841 if (!n->valid || n->is_tex)
1842 continue;
1843
1844 src[i] = etna_create_src(reg, n);
1845 }
1846
1847 const unsigned opc = inst->Instruction.Opcode;
1848 const struct instr_translater *t = &translaters[opc];
1849
1850 if (t->fxn) {
1851 t->fxn(t, c, inst, src);
1852
1853 inst_idx += 1;
1854 } else {
1855 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1856 assert(0);
1857 }
1858 break;
1859 }
1860 }
1861 tgsi_parse_free(&ctx);
1862 }
1863
1864 /* Look up register by semantic */
1865 static struct etna_reg_desc *
1866 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1867 {
1868 for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1869 struct etna_reg_desc *reg = &c->file[file].reg[idx];
1870
1871 if (reg->semantic.Name == name && reg->semantic.Index == index)
1872 return reg;
1873 }
1874
1875 return NULL; /* not found */
1876 }
1877
1878 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1879 * - this is a vertex shader
1880 * - and this is an older GPU
1881 */
1882 static void
1883 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1884 {
1885 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1886 /* find position out */
1887 struct etna_reg_desc *pos_reg =
1888 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1889
1890 if (pos_reg != NULL) {
1891 /*
1892 * ADD tX.__z_, tX.zzzz, void, tX.wwww
1893 * MUL tX.__z_, tX.zzzz, 0.5, void
1894 */
1895 emit_inst(c, &(struct etna_inst) {
1896 .opcode = INST_OPCODE_ADD,
1897 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1898 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1899 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1900 });
1901 emit_inst(c, &(struct etna_inst) {
1902 .opcode = INST_OPCODE_MUL,
1903 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1904 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1905 .src[1] = alloc_imm_f32(c, 0.5f),
1906 });
1907 }
1908 }
1909 }
1910
1911 /** add a NOP to the shader if
1912 * a) the shader is empty
1913 * or
1914 * b) there is a label at the end of the shader
1915 */
1916 static void
1917 etna_compile_add_nop_if_needed(struct etna_compile *c)
1918 {
1919 bool label_at_last_inst = false;
1920
1921 for (int idx = 0; idx < c->labels_count; ++idx) {
1922 if (c->labels[idx].inst_idx == c->inst_ptr)
1923 label_at_last_inst = true;
1924
1925 }
1926
1927 if (c->inst_ptr == 0 || label_at_last_inst)
1928 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1929 }
1930
1931 static void
1932 assign_uniforms(struct etna_compile_file *file, unsigned base)
1933 {
1934 for (int idx = 0; idx < file->reg_size; ++idx) {
1935 file->reg[idx].native.valid = 1;
1936 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1937 file->reg[idx].native.id = base + idx;
1938 }
1939 }
1940
1941 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1942 * CONST must be consecutive as const buffers are supposed to be consecutive,
1943 * and before IMM, as this is
1944 * more convenient because is possible for the compilation process itself to
1945 * generate extra
1946 * immediates for constants such as pi, one, zero.
1947 */
1948 static void
1949 assign_constants_and_immediates(struct etna_compile *c)
1950 {
1951 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1952 /* immediates start after the constants */
1953 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1954 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1955 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1956 c->imm_size);
1957 }
1958
1959 /* Assign declared samplers to native texture units */
1960 static void
1961 assign_texture_units(struct etna_compile *c)
1962 {
1963 uint tex_base = 0;
1964
1965 if (c->info.processor == PIPE_SHADER_VERTEX)
1966 tex_base = c->specs->vertex_sampler_offset;
1967
1968 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1969 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1970 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1971 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1972 }
1973 }
1974
1975 /* Additional pass to fill in branch targets. This pass should be last
1976 * as no instruction reordering or removing/addition can be done anymore
1977 * once the branch targets are computed.
1978 */
1979 static void
1980 etna_compile_fill_in_labels(struct etna_compile *c)
1981 {
1982 for (int idx = 0; idx < c->inst_ptr; ++idx) {
1983 if (c->lbl_usage[idx])
1984 etna_assemble_set_imm(&c->code[idx * 4], c->lbl_usage[idx]->inst_idx);
1985 }
1986 }
1987
1988 /* compare two etna_native_reg structures, return true if equal */
1989 static bool
1990 cmp_etna_native_reg(const struct etna_native_reg to,
1991 const struct etna_native_reg from)
1992 {
1993 return to.valid == from.valid && to.is_tex == from.is_tex &&
1994 to.rgroup == from.rgroup && to.id == from.id;
1995 }
1996
1997 /* go through all declarations and swap native registers *to* and *from* */
1998 static void
1999 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2000 const struct etna_native_reg from)
2001 {
2002 if (cmp_etna_native_reg(from, to))
2003 return; /* Nothing to do */
2004
2005 for (int idx = 0; idx < c->total_decls; ++idx) {
2006 if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2007 c->decl[idx].native = to;
2008 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2009 c->decl[idx].native = from;
2010 }
2011 }
2012 }
2013
2014 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2015 * Semantic POS is always t0. If that semantic is not used, avoid t0.
2016 */
2017 static void
2018 permute_ps_inputs(struct etna_compile *c)
2019 {
2020 /* Special inputs:
2021 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
2022 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
2023 */
2024 uint native_idx = 1;
2025
2026 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2027 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2028 uint input_id;
2029 assert(reg->has_semantic);
2030
2031 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2032 continue;
2033
2034 input_id = native_idx++;
2035 swap_native_registers(c, etna_native_temp(input_id),
2036 c->file[TGSI_FILE_INPUT].reg[idx].native);
2037 }
2038
2039 c->num_varyings = native_idx - 1;
2040
2041 if (native_idx > c->next_free_native)
2042 c->next_free_native = native_idx;
2043 }
2044
2045 /* fill in ps inputs into shader object */
2046 static void
2047 fill_in_ps_inputs(struct etna_shader *sobj, struct etna_compile *c)
2048 {
2049 struct etna_shader_io_file *sf = &sobj->infile;
2050
2051 sf->num_reg = 0;
2052
2053 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2054 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2055
2056 if (reg->native.id > 0) {
2057 assert(sf->num_reg < ETNA_NUM_INPUTS);
2058 sf->reg[sf->num_reg].reg = reg->native.id;
2059 sf->reg[sf->num_reg].semantic = reg->semantic;
2060 /* convert usage mask to number of components (*=wildcard)
2061 * .r (0..1) -> 1 component
2062 * .*g (2..3) -> 2 component
2063 * .**b (4..7) -> 3 components
2064 * .***a (8..15) -> 4 components
2065 */
2066 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2067 sf->num_reg++;
2068 }
2069 }
2070
2071 assert(sf->num_reg == c->num_varyings);
2072 sobj->input_count_unk8 = 31; /* XXX what is this */
2073 }
2074
2075 /* fill in output mapping for ps into shader object */
2076 static void
2077 fill_in_ps_outputs(struct etna_shader *sobj, struct etna_compile *c)
2078 {
2079 sobj->outfile.num_reg = 0;
2080
2081 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2082 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2083
2084 switch (reg->semantic.Name) {
2085 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2086 sobj->ps_color_out_reg = reg->native.id;
2087 break;
2088 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2089 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2090 break;
2091 default:
2092 assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2093 }
2094 }
2095 }
2096
2097 /* fill in inputs for vs into shader object */
2098 static void
2099 fill_in_vs_inputs(struct etna_shader *sobj, struct etna_compile *c)
2100 {
2101 struct etna_shader_io_file *sf = &sobj->infile;
2102
2103 sf->num_reg = 0;
2104 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2105 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2106 assert(sf->num_reg < ETNA_NUM_INPUTS);
2107 /* XXX exclude inputs with special semantics such as gl_frontFacing */
2108 sf->reg[sf->num_reg].reg = reg->native.id;
2109 sf->reg[sf->num_reg].semantic = reg->semantic;
2110 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2111 sf->num_reg++;
2112 }
2113
2114 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2115 }
2116
2117 /* build two-level output index [Semantic][Index] for fast linking */
2118 static void
2119 build_output_index(struct etna_shader *sobj)
2120 {
2121 int total = 0;
2122 int offset = 0;
2123
2124 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2125 total += sobj->output_count_per_semantic[name];
2126
2127 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2128
2129 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2130 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2131 offset += sobj->output_count_per_semantic[name];
2132 }
2133
2134 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2135 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2136 [sobj->outfile.reg[idx].semantic.Index] =
2137 &sobj->outfile.reg[idx];
2138 }
2139 }
2140
2141 /* fill in outputs for vs into shader object */
2142 static void
2143 fill_in_vs_outputs(struct etna_shader *sobj, struct etna_compile *c)
2144 {
2145 struct etna_shader_io_file *sf = &sobj->outfile;
2146
2147 sf->num_reg = 0;
2148 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2149 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2150 assert(sf->num_reg < ETNA_NUM_INPUTS);
2151
2152 switch (reg->semantic.Name) {
2153 case TGSI_SEMANTIC_POSITION:
2154 sobj->vs_pos_out_reg = reg->native.id;
2155 break;
2156 case TGSI_SEMANTIC_PSIZE:
2157 sobj->vs_pointsize_out_reg = reg->native.id;
2158 break;
2159 default:
2160 sf->reg[sf->num_reg].reg = reg->native.id;
2161 sf->reg[sf->num_reg].semantic = reg->semantic;
2162 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2163 sf->num_reg++;
2164 sobj->output_count_per_semantic[reg->semantic.Name] =
2165 MAX2(reg->semantic.Index + 1,
2166 sobj->output_count_per_semantic[reg->semantic.Name]);
2167 }
2168 }
2169
2170 /* build two-level index for linking */
2171 build_output_index(sobj);
2172
2173 /* fill in "mystery meat" load balancing value. This value determines how
2174 * work is scheduled between VS and PS
2175 * in the unified shader architecture. More precisely, it is determined from
2176 * the number of VS outputs, as well as chip-specific
2177 * vertex output buffer size, vertex cache size, and the number of shader
2178 * cores.
2179 *
2180 * XXX this is a conservative estimate, the "optimal" value is only known for
2181 * sure at link time because some
2182 * outputs may be unused and thus unmapped. Then again, in the general use
2183 * case with GLSL the vertex and fragment
2184 * shaders are linked already before submitting to Gallium, thus all outputs
2185 * are used.
2186 */
2187 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2188 assert(half_out);
2189
2190 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2191 2 * half_out * c->specs->vertex_cache_size)) +
2192 9) /
2193 10;
2194 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2195 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2196 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2197 VIVS_VS_LOAD_BALANCING_C(0x3f) |
2198 VIVS_VS_LOAD_BALANCING_D(0x0f);
2199 }
2200
2201 static bool
2202 etna_compile_check_limits(struct etna_compile *c)
2203 {
2204 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2205 ? c->specs->max_vs_uniforms
2206 : c->specs->max_ps_uniforms;
2207 /* round up number of uniforms, including immediates, in units of four */
2208 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2209
2210 if (c->inst_ptr > c->specs->max_instructions) {
2211 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2212 c->specs->max_instructions);
2213 return false;
2214 }
2215
2216 if (c->next_free_native > c->specs->max_registers) {
2217 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2218 c->specs->max_registers);
2219 return false;
2220 }
2221
2222 if (num_uniforms > max_uniforms) {
2223 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2224 max_uniforms);
2225 return false;
2226 }
2227
2228 if (c->num_varyings > c->specs->max_varyings) {
2229 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2230 c->specs->max_varyings);
2231 return false;
2232 }
2233
2234 if (c->imm_base > c->specs->num_constants) {
2235 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2236 c->specs->num_constants);
2237 }
2238
2239 return true;
2240 }
2241
2242 static void
2243 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader *sobj)
2244 {
2245 uint32_t count = c->imm_size;
2246 struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2247
2248 uinfo->const_count = c->imm_base;
2249 uinfo->imm_count = count;
2250 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2251 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2252
2253 etna_set_shader_uniforms_dirty_flags(sobj);
2254 }
2255
2256 struct etna_shader *
2257 etna_compile_shader(const struct etna_specs *specs,
2258 const struct tgsi_token *tokens)
2259 {
2260 /* Create scratch space that may be too large to fit on stack
2261 */
2262 bool ret;
2263 struct etna_compile *c;
2264 struct etna_shader *shader;
2265
2266 struct tgsi_lowering_config lconfig = {
2267 .lower_SCS = specs->has_sin_cos_sqrt,
2268 .lower_FLR = !specs->has_sign_floor_ceil,
2269 .lower_CEIL = !specs->has_sign_floor_ceil,
2270 .lower_POW = true,
2271 .lower_EXP = true,
2272 .lower_LOG = true,
2273 .lower_DP2 = true,
2274 .lower_DP2A = true,
2275 .lower_TRUNC = true,
2276 .lower_XPD = true
2277 };
2278
2279 c = CALLOC_STRUCT(etna_compile);
2280 if (!c)
2281 return NULL;
2282
2283 shader = CALLOC_STRUCT(etna_shader);
2284 if (!shader)
2285 goto out;
2286
2287 c->specs = specs;
2288 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2289 c->free_tokens = !!c->tokens;
2290 if (!c->tokens) {
2291 /* no lowering */
2292 c->tokens = tokens;
2293 }
2294
2295 /* Build a map from gallium register to native registers for files
2296 * CONST, SAMP, IMM, OUT, IN, TEMP.
2297 * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2298 * vertex shaders.
2299 */
2300 /* Pass one -- check register file declarations and immediates */
2301 etna_compile_parse_declarations(c);
2302
2303 etna_allocate_decls(c);
2304
2305 /* Pass two -- check usage of temporaries, inputs, outputs */
2306 etna_compile_pass_check_usage(c);
2307
2308 assign_special_inputs(c);
2309
2310 /* Assign native temp register to TEMPs */
2311 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2312
2313 /* optimize outputs */
2314 etna_compile_pass_optimize_outputs(c);
2315
2316 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2317 * this is part of RGROUP_INTERNAL
2318 */
2319
2320 /* assign inputs: last usage of input should be <= first usage of temp */
2321 /* potential optimization case:
2322 * if single MOV TEMP[y], IN[x] before which temp y is not used, and
2323 * after which IN[x]
2324 * is not read, temp[y] can be used as input register as-is
2325 */
2326 /* sort temporaries by first use
2327 * sort inputs by last usage
2328 * iterate over inputs, temporaries
2329 * if last usage of input <= first usage of temp:
2330 * assign input to temp
2331 * advance input, temporary pointer
2332 * else
2333 * advance temporary pointer
2334 *
2335 * potential problem: instruction with multiple inputs of which one is the
2336 * temp and the other is the input;
2337 * however, as the temp is not used before this, how would this make
2338 * sense? uninitialized temporaries have an undefined
2339 * value, so this would be ok
2340 */
2341 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2342
2343 /* assign outputs: first usage of output should be >= last usage of temp */
2344 /* potential optimization case:
2345 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2346 * writing all components that are used in
2347 * the shader) after which temp y is no longer used temp[y] can be
2348 * used as output register as-is
2349 *
2350 * potential problem: instruction with multiple outputs of which one is the
2351 * temp and the other is the output;
2352 * however, as the temp is not used after this, how would this make
2353 * sense? could just discard the output value
2354 */
2355 /* sort temporaries by last use
2356 * sort outputs by first usage
2357 * iterate over outputs, temporaries
2358 * if first usage of output >= last usage of temp:
2359 * assign output to temp
2360 * advance output, temporary pointer
2361 * else
2362 * advance temporary pointer
2363 */
2364 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2365
2366 assign_constants_and_immediates(c);
2367 assign_texture_units(c);
2368
2369 /* list declarations */
2370 for (int x = 0; x < c->total_decls; ++x) {
2371 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2372 "last_use=%i native=%i usage_mask=%x "
2373 "has_semantic=%i",
2374 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2375 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2376 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2377 c->decl[x].usage_mask, c->decl[x].has_semantic);
2378 if (c->decl[x].has_semantic)
2379 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2380 tgsi_semantic_names[c->decl[x].semantic.Name],
2381 c->decl[x].semantic.Index);
2382 }
2383 /* XXX for PS we need to permute so that inputs are always in temporary
2384 * 0..N-1.
2385 * There is no "switchboard" for varyings (AFAIK!). The output color,
2386 * however, can be routed
2387 * from an arbitrary temporary.
2388 */
2389 if (c->info.processor == PIPE_SHADER_FRAGMENT)
2390 permute_ps_inputs(c);
2391
2392
2393 /* list declarations */
2394 for (int x = 0; x < c->total_decls; ++x) {
2395 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2396 "last_use=%i native=%i usage_mask=%x "
2397 "has_semantic=%i",
2398 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2399 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2400 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2401 c->decl[x].usage_mask, c->decl[x].has_semantic);
2402 if (c->decl[x].has_semantic)
2403 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2404 tgsi_semantic_names[c->decl[x].semantic.Name],
2405 c->decl[x].semantic.Index);
2406 }
2407
2408 /* pass 3: generate instructions */
2409 etna_compile_pass_generate_code(c);
2410 etna_compile_add_z_div_if_needed(c);
2411 etna_compile_add_nop_if_needed(c);
2412 etna_compile_fill_in_labels(c);
2413
2414 ret = etna_compile_check_limits(c);
2415 if (!ret) {
2416 FREE(shader);
2417 shader = NULL;
2418 goto out;
2419 }
2420
2421 /* fill in output structure */
2422 shader->processor = c->info.processor;
2423 shader->code_size = c->inst_ptr * 4;
2424 shader->code = mem_dup(c->code, c->inst_ptr * 16);
2425 shader->num_loops = c->num_loops;
2426 shader->num_temps = c->next_free_native;
2427 shader->vs_pos_out_reg = -1;
2428 shader->vs_pointsize_out_reg = -1;
2429 shader->ps_color_out_reg = -1;
2430 shader->ps_depth_out_reg = -1;
2431 copy_uniform_state_to_shader(c, shader);
2432
2433 if (c->info.processor == PIPE_SHADER_VERTEX) {
2434 fill_in_vs_inputs(shader, c);
2435 fill_in_vs_outputs(shader, c);
2436 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2437 fill_in_ps_inputs(shader, c);
2438 fill_in_ps_outputs(shader, c);
2439 }
2440
2441 out:
2442 if (c->free_tokens)
2443 FREE((void *)c->tokens);
2444
2445 FREE(c->labels);
2446 FREE(c);
2447
2448 return shader;
2449 }
2450
2451 extern const char *tgsi_swizzle_names[];
2452 void
2453 etna_dump_shader(const struct etna_shader *shader)
2454 {
2455 if (shader->processor == PIPE_SHADER_VERTEX)
2456 printf("VERT\n");
2457 else
2458 printf("FRAG\n");
2459
2460
2461 etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2462
2463 printf("num loops: %i\n", shader->num_loops);
2464 printf("num temps: %i\n", shader->num_temps);
2465 printf("num const: %i\n", shader->uniforms.const_count);
2466 printf("immediates:\n");
2467 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2468 printf(" [%i].%s = %f (0x%08x)\n",
2469 (idx + shader->uniforms.const_count) / 4,
2470 tgsi_swizzle_names[idx % 4],
2471 *((float *)&shader->uniforms.imm_data[idx]),
2472 shader->uniforms.imm_data[idx]);
2473 }
2474 printf("inputs:\n");
2475 for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2476 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2477 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2478 shader->infile.reg[idx].semantic.Index,
2479 shader->infile.reg[idx].num_components);
2480 }
2481 printf("outputs:\n");
2482 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2483 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2484 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2485 shader->outfile.reg[idx].semantic.Index,
2486 shader->outfile.reg[idx].num_components);
2487 }
2488 printf("special:\n");
2489 if (shader->processor == PIPE_SHADER_VERTEX) {
2490 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2491 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2492 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2493 } else {
2494 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2495 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2496 }
2497 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8);
2498 }
2499
2500 void
2501 etna_destroy_shader(struct etna_shader *shader)
2502 {
2503 assert(shader);
2504
2505 FREE(shader->code);
2506 FREE(shader->uniforms.imm_data);
2507 FREE(shader->uniforms.imm_contents);
2508 FREE(shader->output_per_semantic_list);
2509 FREE(shader);
2510 }
2511
2512 static const struct etna_shader_inout *
2513 etna_shader_vs_lookup(const struct etna_shader *sobj,
2514 const struct etna_shader_inout *in)
2515 {
2516 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2517 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2518
2519 return NULL;
2520 }
2521
2522 bool
2523 etna_link_shader(struct etna_shader_link_info *info,
2524 const struct etna_shader *vs, const struct etna_shader *fs)
2525 {
2526 /* For each fragment input we need to find the associated vertex shader
2527 * output, which can be found by matching on semantic name and index. A
2528 * binary search could be used because the vs outputs are sorted by their
2529 * semantic index and grouped by semantic type by fill_in_vs_outputs.
2530 */
2531 assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2532
2533 for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2534 const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2535 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2536 struct etna_varying *varying;
2537
2538 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2539
2540 if (fsio->reg > info->num_varyings)
2541 info->num_varyings = fsio->reg;
2542
2543 varying = &info->varyings[fsio->reg - 1];
2544 varying->num_components = fsio->num_components;
2545
2546 if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
2547 varying->pa_attributes = 0x200;
2548 else /* texture coord or other bypasses flat shading */
2549 varying->pa_attributes = 0x2f1;
2550
2551 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2552 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2553 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2554 varying->use[2] = VARYING_COMPONENT_USE_USED;
2555 varying->use[3] = VARYING_COMPONENT_USE_USED;
2556 varying->reg = 0; /* replaced by point coord -- doesn't matter */
2557 continue;
2558 }
2559
2560 if (vsio == NULL)
2561 return true; /* not found -- link error */
2562
2563 varying->use[0] = VARYING_COMPONENT_USE_USED;
2564 varying->use[1] = VARYING_COMPONENT_USE_USED;
2565 varying->use[2] = VARYING_COMPONENT_USE_USED;
2566 varying->use[3] = VARYING_COMPONENT_USE_USED;
2567 varying->reg = vsio->reg;
2568 }
2569
2570 assert(info->num_varyings == fs->infile.num_reg);
2571
2572 return false;
2573 }