etnaviv: handle PIPE_CAP_TGSI_FS_FBFETCH
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler.c
1 /*
2 * Copyright (c) 2012-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 /* TGSI->Vivante shader ISA conversion */
28
29 /* What does the compiler return (see etna_shader_object)?
30 * 1) instruction data
31 * 2) input-to-temporary mapping (fixed for ps)
32 * *) in case of ps, semantic -> varying id mapping
33 * *) for each varying: number of components used (r, rg, rgb, rgba)
34 * 3) temporary-to-output mapping (in case of vs, fixed for ps)
35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 * 5) immediates base offset, immediates data
37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 * configure the hw, but useful for error checking
39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 * (output reg id is enough)
41 *
42 * Empty shaders are not allowed, should always at least generate a NOP. Also
43 * if there is a label at the end of the shader, an extra NOP should be
44 * generated as jump target.
45 *
46 * TODO
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
49 */
50
51 #include "etnaviv_compiler.h"
52
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
59
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73
74 #define ETNA_MAX_INNER_TEMPS 2
75
76 static const float sincos_const[2][4] = {
77 {
78 2., -1., 4., -4.,
79 },
80 {
81 1. / (2. * M_PI), 0.75, 0.5, 0.0,
82 },
83 };
84
85 /* Native register description structure */
86 struct etna_native_reg {
87 unsigned valid : 1;
88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89 unsigned rgroup : 3;
90 unsigned id : 9;
91 };
92
93 /* Register description */
94 struct etna_reg_desc {
95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96 int idx; /* index into file */
97 bool active; /* used in program */
98 int first_use; /* instruction id of first use (scope begin) */
99 int last_use; /* instruction id of last use (scope end, inclusive) */
100
101 struct etna_native_reg native; /* native register to map to */
102 unsigned usage_mask : 4; /* usage, per channel */
103 bool has_semantic; /* register has associated TGSI semantic */
104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105 struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107
108 /* Label information structure */
109 struct etna_compile_label {
110 int inst_idx; /* Instruction id that label points to */
111 };
112
113 enum etna_compile_frame_type {
114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115 ETNA_COMPILE_FRAME_LOOP,
116 };
117
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119 */
120 struct etna_compile_frame {
121 enum etna_compile_frame_type type;
122 struct etna_compile_label *lbl_else;
123 struct etna_compile_label *lbl_endif;
124 struct etna_compile_label *lbl_loop_bgn;
125 struct etna_compile_label *lbl_loop_end;
126 };
127
128 struct etna_compile_file {
129 /* Number of registers in each TGSI file (max register+1) */
130 size_t reg_size;
131 /* Register descriptions, per register index */
132 struct etna_reg_desc *reg;
133 };
134
135 #define array_insert(arr, val) \
136 do { \
137 if (arr##_count == arr##_sz) { \
138 arr##_sz = MAX2(2 * arr##_sz, 16); \
139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140 } \
141 arr[arr##_count++] = val; \
142 } while (0)
143
144
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147 const struct tgsi_token *tokens;
148 bool free_tokens;
149
150 struct tgsi_shader_info info;
151
152 /* Register descriptions, per TGSI file, per register index */
153 struct etna_compile_file file[TGSI_FILE_COUNT];
154
155 /* Keep track of TGSI register declarations */
156 struct etna_reg_desc decl[ETNA_MAX_DECL];
157 uint total_decls;
158
159 /* Bitmap of dead instructions which are removed in a separate pass */
160 bool dead_inst[ETNA_MAX_TOKENS];
161
162 /* Immediate data */
163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164 uint32_t imm_data[ETNA_MAX_IMM];
165 uint32_t imm_base; /* base of immediates (in 32 bit units) */
166 uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168 /* Next free native register, for register allocation */
169 uint32_t next_free_native;
170
171 /* Temporary register for use within translated TGSI instruction,
172 * only allocated when needed.
173 */
174 int inner_temps; /* number of inner temps used; only up to one available at
175 this point */
176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178 /* Fields for handling nested conditionals */
179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180 int frame_sp;
181 struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183 unsigned labels_count, labels_sz;
184 struct etna_compile_label *labels;
185
186 /* Code generation */
187 int inst_ptr; /* current instruction pointer */
188 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
189
190 /* I/O */
191
192 /* Number of varyings (PS only) */
193 int num_varyings;
194
195 /* GPU hardware specs */
196 const struct etna_specs *specs;
197 };
198
199 static struct etna_reg_desc *
200 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
201 {
202 return &c->file[dst.File].reg[dst.Index];
203 }
204
205 static struct etna_reg_desc *
206 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
207 {
208 return &c->file[src.File].reg[src.Index];
209 }
210
211 static struct etna_native_reg
212 etna_native_temp(unsigned reg)
213 {
214 return (struct etna_native_reg) {
215 .valid = 1,
216 .rgroup = INST_RGROUP_TEMP,
217 .id = reg
218 };
219 }
220
221 /** Register allocation **/
222 enum reg_sort_order {
223 FIRST_USE_ASC,
224 FIRST_USE_DESC,
225 LAST_USE_ASC,
226 LAST_USE_DESC
227 };
228
229 /* Augmented register description for sorting */
230 struct sort_rec {
231 struct etna_reg_desc *ptr;
232 int key;
233 };
234
235 static int
236 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
237 {
238 if (a->key < b->key)
239 return -1;
240
241 if (a->key > b->key)
242 return 1;
243
244 return 0;
245 }
246
247 /* create an index on a register set based on certain criteria. */
248 static int
249 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
250 enum reg_sort_order so)
251 {
252 struct etna_reg_desc *regs = file->reg;
253 int ptr = 0;
254
255 /* pre-populate keys from active registers */
256 for (int idx = 0; idx < file->reg_size; ++idx) {
257 /* only interested in active registers now; will only assign inactive ones
258 * if no space in active ones */
259 if (regs[idx].active) {
260 sorted[ptr].ptr = &regs[idx];
261
262 switch (so) {
263 case FIRST_USE_ASC:
264 sorted[ptr].key = regs[idx].first_use;
265 break;
266 case LAST_USE_ASC:
267 sorted[ptr].key = regs[idx].last_use;
268 break;
269 case FIRST_USE_DESC:
270 sorted[ptr].key = -regs[idx].first_use;
271 break;
272 case LAST_USE_DESC:
273 sorted[ptr].key = -regs[idx].last_use;
274 break;
275 }
276 ptr++;
277 }
278 }
279
280 /* sort index by key */
281 qsort(sorted, ptr, sizeof(struct sort_rec),
282 (int (*)(const void *, const void *))sort_rec_compar);
283
284 return ptr;
285 }
286
287 /* Allocate a new, unused, native temp register */
288 static struct etna_native_reg
289 alloc_new_native_reg(struct etna_compile *c)
290 {
291 assert(c->next_free_native < ETNA_MAX_TEMPS);
292 return etna_native_temp(c->next_free_native++);
293 }
294
295 /* assign TEMPs to native registers */
296 static void
297 assign_temporaries_to_native(struct etna_compile *c,
298 struct etna_compile_file *file)
299 {
300 struct etna_reg_desc *temps = file->reg;
301
302 for (int idx = 0; idx < file->reg_size; ++idx)
303 temps[idx].native = alloc_new_native_reg(c);
304 }
305
306 /* assign inputs and outputs to temporaries
307 * Gallium assumes that the hardware has separate registers for taking input and
308 * output, however Vivante GPUs use temporaries both for passing in inputs and
309 * passing back outputs.
310 * Try to re-use temporary registers where possible. */
311 static void
312 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
313 {
314 bool mode_inputs = (file == TGSI_FILE_INPUT);
315 int inout_ptr = 0, num_inouts;
316 int temp_ptr = 0, num_temps;
317 struct sort_rec inout_order[ETNA_MAX_TEMPS];
318 struct sort_rec temps_order[ETNA_MAX_TEMPS];
319 num_inouts = sort_registers(inout_order, &c->file[file],
320 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
321 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
322 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
323
324 while (inout_ptr < num_inouts && temp_ptr < num_temps) {
325 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
326 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
327
328 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
329 inout_ptr++;
330 continue;
331 }
332
333 /* last usage of this input is before or in same instruction of first use
334 * of temporary? */
335 if (mode_inputs ? (inout->last_use <= temp->first_use)
336 : (inout->first_use >= temp->last_use)) {
337 /* assign it and advance to next input */
338 inout->native = temp->native;
339 inout_ptr++;
340 }
341
342 temp_ptr++;
343 }
344
345 /* if we couldn't reuse current ones, allocate new temporaries */
346 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
347 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
348
349 if (inout->active && !inout->native.valid)
350 inout->native = alloc_new_native_reg(c);
351 }
352 }
353
354 /* Allocate an immediate with a certain value and return the index. If
355 * there is already an immediate with that value, return that.
356 */
357 static struct etna_inst_src
358 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
359 uint32_t value)
360 {
361 int idx;
362
363 /* Could use a hash table to speed this up */
364 for (idx = 0; idx < c->imm_size; ++idx) {
365 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
366 break;
367 }
368
369 /* look if there is an unused slot */
370 if (idx == c->imm_size) {
371 for (idx = 0; idx < c->imm_size; ++idx) {
372 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
373 break;
374 }
375 }
376
377 /* allocate new immediate */
378 if (idx == c->imm_size) {
379 assert(c->imm_size < ETNA_MAX_IMM);
380 idx = c->imm_size++;
381 c->imm_data[idx] = value;
382 c->imm_contents[idx] = contents;
383 }
384
385 /* swizzle so that component with value is returned in all components */
386 idx += c->imm_base;
387 struct etna_inst_src imm_src = {
388 .use = 1,
389 .rgroup = INST_RGROUP_UNIFORM_0,
390 .reg = idx / 4,
391 .swiz = INST_SWIZ_BROADCAST(idx & 3)
392 };
393
394 return imm_src;
395 }
396
397 static struct etna_inst_src
398 alloc_imm_u32(struct etna_compile *c, uint32_t value)
399 {
400 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
401 }
402
403 static struct etna_inst_src
404 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
405 const uint32_t *values)
406 {
407 struct etna_inst_src imm_src = { };
408 int idx, i;
409
410 for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
411 /* What if we can use a uniform with a different swizzle? */
412 for (i = 0; i < 4; i++)
413 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
414 break;
415 if (i == 4)
416 break;
417 }
418
419 if (idx + 3 >= c->imm_size) {
420 idx = align(c->imm_size, 4);
421 assert(idx + 4 <= ETNA_MAX_IMM);
422
423 for (i = 0; i < 4; i++) {
424 c->imm_data[idx + i] = values[i];
425 c->imm_contents[idx + i] = contents;
426 }
427
428 c->imm_size = idx + 4;
429 }
430
431 assert((c->imm_base & 3) == 0);
432 idx += c->imm_base;
433 imm_src.use = 1;
434 imm_src.rgroup = INST_RGROUP_UNIFORM_0;
435 imm_src.reg = idx / 4;
436 imm_src.swiz = INST_SWIZ_IDENTITY;
437
438 return imm_src;
439 }
440
441 static uint32_t
442 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
443 unsigned swiz_idx)
444 {
445 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
446 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
447
448 return c->imm_data[idx];
449 }
450
451 /* Allocate immediate with a certain float value. If there is already an
452 * immediate with that value, return that.
453 */
454 static struct etna_inst_src
455 alloc_imm_f32(struct etna_compile *c, float value)
456 {
457 return alloc_imm_u32(c, fui(value));
458 }
459
460 static struct etna_inst_src
461 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
462 {
463 uint32_t val[4];
464
465 for (int i = 0; i < 4; i++)
466 val[i] = fui(vec4[i]);
467
468 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
469 }
470
471 /* Pass -- check register file declarations and immediates */
472 static void
473 etna_compile_parse_declarations(struct etna_compile *c)
474 {
475 struct tgsi_parse_context ctx = { };
476 unsigned status = TGSI_PARSE_OK;
477 status = tgsi_parse_init(&ctx, c->tokens);
478 assert(status == TGSI_PARSE_OK);
479
480 while (!tgsi_parse_end_of_tokens(&ctx)) {
481 tgsi_parse_token(&ctx);
482
483 switch (ctx.FullToken.Token.Type) {
484 case TGSI_TOKEN_TYPE_IMMEDIATE: {
485 /* immediates are handled differently from other files; they are
486 * not declared explicitly, and always add four components */
487 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
488 assert(c->imm_size <= (ETNA_MAX_IMM - 4));
489
490 for (int i = 0; i < 4; ++i) {
491 unsigned idx = c->imm_size++;
492
493 c->imm_data[idx] = imm->u[i].Uint;
494 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
495 }
496 }
497 break;
498 }
499 }
500
501 tgsi_parse_free(&ctx);
502 }
503
504 /* Allocate register declarations for the registers in all register files */
505 static void
506 etna_allocate_decls(struct etna_compile *c)
507 {
508 uint idx = 0;
509
510 for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
511 c->file[x].reg = &c->decl[idx];
512 c->file[x].reg_size = c->info.file_max[x] + 1;
513
514 for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
515 c->decl[idx].file = x;
516 c->decl[idx].idx = sub;
517 idx++;
518 }
519 }
520
521 c->total_decls = idx;
522 }
523
524 /* Pass -- check and record usage of temporaries, inputs, outputs */
525 static void
526 etna_compile_pass_check_usage(struct etna_compile *c)
527 {
528 struct tgsi_parse_context ctx = { };
529 unsigned status = TGSI_PARSE_OK;
530 status = tgsi_parse_init(&ctx, c->tokens);
531 assert(status == TGSI_PARSE_OK);
532
533 for (int idx = 0; idx < c->total_decls; ++idx) {
534 c->decl[idx].active = false;
535 c->decl[idx].first_use = c->decl[idx].last_use = -1;
536 }
537
538 int inst_idx = 0;
539 while (!tgsi_parse_end_of_tokens(&ctx)) {
540 tgsi_parse_token(&ctx);
541 /* find out max register #s used
542 * For every register mark first and last instruction index where it's
543 * used this allows finding ranges where the temporary can be borrowed
544 * as input and/or output register
545 *
546 * XXX in the case of loops this needs special care, or even be completely
547 * disabled, as
548 * the last usage of a register inside a loop means it can still be used
549 * on next loop
550 * iteration (execution is no longer * chronological). The register can
551 * only be
552 * declared "free" after the loop finishes.
553 *
554 * Same for inputs: the first usage of a register inside a loop doesn't
555 * mean that the register
556 * won't have been overwritten in previous iteration. The register can
557 * only be declared free before the loop
558 * starts.
559 * The proper way would be to do full dominator / post-dominator analysis
560 * (especially with more complicated
561 * control flow such as direct branch instructions) but not for now...
562 */
563 switch (ctx.FullToken.Token.Type) {
564 case TGSI_TOKEN_TYPE_DECLARATION: {
565 /* Declaration: fill in file details */
566 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
567 struct etna_compile_file *file = &c->file[decl->Declaration.File];
568
569 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
570 file->reg[idx].usage_mask = 0; // we'll compute this ourselves
571 file->reg[idx].has_semantic = decl->Declaration.Semantic;
572 file->reg[idx].semantic = decl->Semantic;
573 file->reg[idx].interp = decl->Interp;
574 }
575 } break;
576 case TGSI_TOKEN_TYPE_INSTRUCTION: {
577 /* Instruction: iterate over operands of instruction */
578 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
579
580 /* iterate over destination registers */
581 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
582 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
583
584 if (reg_desc->first_use == -1)
585 reg_desc->first_use = inst_idx;
586
587 reg_desc->last_use = inst_idx;
588 reg_desc->active = true;
589 }
590
591 /* iterate over source registers */
592 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
593 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
594
595 if (reg_desc->first_use == -1)
596 reg_desc->first_use = inst_idx;
597
598 reg_desc->last_use = inst_idx;
599 reg_desc->active = true;
600 /* accumulate usage mask for register, this is used to determine how
601 * many slots for varyings
602 * should be allocated */
603 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
604 }
605 inst_idx += 1;
606 } break;
607 default:
608 break;
609 }
610 }
611
612 tgsi_parse_free(&ctx);
613 }
614
615 /* assign inputs that need to be assigned to specific registers */
616 static void
617 assign_special_inputs(struct etna_compile *c)
618 {
619 if (c->info.processor == PIPE_SHADER_FRAGMENT) {
620 /* never assign t0 as it is the position output, start assigning at t1 */
621 c->next_free_native = 1;
622
623 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
624 for (int idx = 0; idx < c->total_decls; ++idx) {
625 struct etna_reg_desc *reg = &c->decl[idx];
626
627 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
628 reg->native = etna_native_temp(0);
629 }
630 }
631 }
632
633 /* Check that a move instruction does not swizzle any of the components
634 * that it writes.
635 */
636 static bool
637 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
638 const struct tgsi_src_register src)
639 {
640 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
641 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
642 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
643 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
644 }
645
646 /* Pass -- optimize outputs
647 * Mesa tends to generate code like this at the end if their shaders
648 * MOV OUT[1], TEMP[2]
649 * MOV OUT[0], TEMP[0]
650 * MOV OUT[2], TEMP[1]
651 * Recognize if
652 * a) there is only a single assignment to an output register and
653 * b) the temporary is not used after that
654 * Also recognize direct assignment of IN to OUT (passthrough)
655 **/
656 static void
657 etna_compile_pass_optimize_outputs(struct etna_compile *c)
658 {
659 struct tgsi_parse_context ctx = { };
660 int inst_idx = 0;
661 unsigned status = TGSI_PARSE_OK;
662 status = tgsi_parse_init(&ctx, c->tokens);
663 assert(status == TGSI_PARSE_OK);
664
665 while (!tgsi_parse_end_of_tokens(&ctx)) {
666 tgsi_parse_token(&ctx);
667
668 switch (ctx.FullToken.Token.Type) {
669 case TGSI_TOKEN_TYPE_INSTRUCTION: {
670 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
671
672 /* iterate over operands */
673 switch (inst->Instruction.Opcode) {
674 case TGSI_OPCODE_MOV: {
675 /* We are only interested in eliminating MOVs which write to
676 * the shader outputs. Test for this early. */
677 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
678 break;
679 /* Elimination of a MOV must have no visible effect on the
680 * resulting shader: this means the MOV must not swizzle or
681 * saturate, and its source must not have the negate or
682 * absolute modifiers. */
683 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
684 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
685 inst->Src[0].Register.Absolute)
686 break;
687
688 uint out_idx = inst->Dst[0].Register.Index;
689 uint in_idx = inst->Src[0].Register.Index;
690 /* assignment of temporary to output --
691 * and the output doesn't yet have a native register assigned
692 * and the last use of the temporary is this instruction
693 * and the MOV does not do a swizzle
694 */
695 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
696 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
697 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
698 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
699 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
700 /* prevent temp from being re-used for the rest of the shader */
701 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
702 /* mark this MOV instruction as a no-op */
703 c->dead_inst[inst_idx] = true;
704 }
705 /* direct assignment of input to output --
706 * and the input or output doesn't yet have a native register
707 * assigned
708 * and the output is only used in this instruction,
709 * allocate a new register, and associate both input and output to
710 * it
711 * and the MOV does not do a swizzle
712 */
713 if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
714 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
715 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
716 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
717 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
718 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
719 c->file[TGSI_FILE_INPUT].reg[in_idx].native =
720 alloc_new_native_reg(c);
721 /* mark this MOV instruction as a no-op */
722 c->dead_inst[inst_idx] = true;
723 }
724 } break;
725 default:;
726 }
727 inst_idx += 1;
728 } break;
729 }
730 }
731
732 tgsi_parse_free(&ctx);
733 }
734
735 /* Get a temporary to be used within one TGSI instruction.
736 * The first time that this function is called the temporary will be allocated.
737 * Each call to this function will return the same temporary.
738 */
739 static struct etna_native_reg
740 etna_compile_get_inner_temp(struct etna_compile *c)
741 {
742 int inner_temp = c->inner_temps;
743
744 if (inner_temp < ETNA_MAX_INNER_TEMPS) {
745 if (!c->inner_temp[inner_temp].valid)
746 c->inner_temp[inner_temp] = alloc_new_native_reg(c);
747
748 /* alloc_new_native_reg() handles lack of registers */
749 c->inner_temps += 1;
750 } else {
751 BUG("Too many inner temporaries (%i) requested in one instruction",
752 inner_temp + 1);
753 }
754
755 return c->inner_temp[inner_temp];
756 }
757
758 static struct etna_inst_dst
759 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
760 {
761 /* Can only assign to temporaries */
762 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
763
764 struct etna_inst_dst rv = {
765 .comps = comps,
766 .use = 1,
767 .reg = native.id,
768 };
769
770 return rv;
771 }
772
773 static struct etna_inst_src
774 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
775 {
776 assert(native.valid && !native.is_tex);
777
778 struct etna_inst_src rv = {
779 .use = 1,
780 .swiz = swizzle,
781 .rgroup = native.rgroup,
782 .reg = native.id,
783 .amode = INST_AMODE_DIRECT,
784 };
785
786 return rv;
787 }
788
789 static inline struct etna_inst_src
790 negate(struct etna_inst_src src)
791 {
792 src.neg = !src.neg;
793
794 return src;
795 }
796
797 static inline struct etna_inst_src
798 absolute(struct etna_inst_src src)
799 {
800 src.abs = 1;
801
802 return src;
803 }
804
805 static inline struct etna_inst_src
806 swizzle(struct etna_inst_src src, unsigned swizzle)
807 {
808 src.swiz = inst_swiz_compose(src.swiz, swizzle);
809
810 return src;
811 }
812
813 /* Emit instruction and append it to program */
814 static void
815 emit_inst(struct etna_compile *c, struct etna_inst *inst)
816 {
817 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
818
819 /* Check for uniform conflicts (each instruction can only access one
820 * uniform),
821 * if detected, use an intermediate temporary */
822 unsigned uni_rgroup = -1;
823 unsigned uni_reg = -1;
824
825 for (int src = 0; src < ETNA_NUM_SRC; ++src) {
826 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
827 if (uni_reg == -1) { /* first unique uniform used */
828 uni_rgroup = inst->src[src].rgroup;
829 uni_reg = inst->src[src].reg;
830 } else { /* second or later; check that it is a re-use */
831 if (uni_rgroup != inst->src[src].rgroup ||
832 uni_reg != inst->src[src].reg) {
833 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
834 "accesses different uniforms, "
835 "need to generate extra MOV");
836 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
837
838 /* Generate move instruction to temporary */
839 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
840 .opcode = INST_OPCODE_MOV,
841 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
842 INST_COMPS_Z | INST_COMPS_W),
843 .src[2] = inst->src[src]
844 });
845
846 c->inst_ptr++;
847
848 /* Modify instruction to use temp register instead of uniform */
849 inst->src[src].use = 1;
850 inst->src[src].rgroup = INST_RGROUP_TEMP;
851 inst->src[src].reg = inner_temp.id;
852 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
853 inst->src[src].neg = 0; /* negation happens on MOV */
854 inst->src[src].abs = 0; /* abs happens on MOV */
855 inst->src[src].amode = 0; /* amode effects happen on MOV */
856 }
857 }
858 }
859 }
860
861 /* Finally assemble the actual instruction */
862 etna_assemble(&c->code[c->inst_ptr * 4], inst);
863 c->inst_ptr++;
864 }
865
866 static unsigned int
867 etna_amode(struct tgsi_ind_register indirect)
868 {
869 assert(indirect.File == TGSI_FILE_ADDRESS);
870 assert(indirect.Index == 0);
871
872 switch (indirect.Swizzle) {
873 case TGSI_SWIZZLE_X:
874 return INST_AMODE_ADD_A_X;
875 case TGSI_SWIZZLE_Y:
876 return INST_AMODE_ADD_A_Y;
877 case TGSI_SWIZZLE_Z:
878 return INST_AMODE_ADD_A_Z;
879 case TGSI_SWIZZLE_W:
880 return INST_AMODE_ADD_A_W;
881 default:
882 assert(!"Invalid swizzle");
883 }
884 }
885
886 /* convert destination operand */
887 static struct etna_inst_dst
888 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
889 {
890 struct etna_inst_dst rv = {
891 /// XXX .amode
892 .comps = in->Register.WriteMask,
893 };
894
895 if (in->Register.File == TGSI_FILE_ADDRESS) {
896 assert(in->Register.Index == 0);
897 rv.reg = in->Register.Index;
898 rv.use = 0;
899 } else {
900 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
901 in->Register.WriteMask);
902 }
903
904 if (in->Register.Indirect)
905 rv.amode = etna_amode(in->Indirect);
906
907 return rv;
908 }
909
910 /* convert texture operand */
911 static struct etna_inst_tex
912 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
913 const struct tgsi_instruction_texture *tex)
914 {
915 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
916 struct etna_inst_tex rv = {
917 // XXX .amode (to allow for an array of samplers?)
918 .swiz = INST_SWIZ_IDENTITY
919 };
920
921 assert(native_reg.is_tex && native_reg.valid);
922 rv.id = native_reg.id;
923
924 return rv;
925 }
926
927 /* convert source operand */
928 static struct etna_inst_src
929 etna_create_src(const struct tgsi_full_src_register *tgsi,
930 const struct etna_native_reg *native)
931 {
932 const struct tgsi_src_register *reg = &tgsi->Register;
933 struct etna_inst_src rv = {
934 .use = 1,
935 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
936 .neg = reg->Negate,
937 .abs = reg->Absolute,
938 .rgroup = native->rgroup,
939 .reg = native->id,
940 .amode = INST_AMODE_DIRECT,
941 };
942
943 assert(native->valid && !native->is_tex);
944
945 if (reg->Indirect)
946 rv.amode = etna_amode(tgsi->Indirect);
947
948 return rv;
949 }
950
951 static struct etna_inst_src
952 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
953 struct etna_native_reg temp)
954 {
955 struct etna_inst mov = { };
956
957 mov.opcode = INST_OPCODE_MOV;
958 mov.sat = 0;
959 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
960 INST_COMPS_Z | INST_COMPS_W);
961 mov.src[2] = src;
962 emit_inst(c, &mov);
963
964 src.swiz = INST_SWIZ_IDENTITY;
965 src.neg = src.abs = 0;
966 src.rgroup = temp.rgroup;
967 src.reg = temp.id;
968
969 return src;
970 }
971
972 static struct etna_inst_src
973 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
974 {
975 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
976
977 return etna_mov_src_to_temp(c, src, temp);
978 }
979
980 static bool
981 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
982 {
983 return etna_rgroup_is_uniform(a.rgroup) &&
984 etna_rgroup_is_uniform(b.rgroup) &&
985 (a.rgroup != b.rgroup || a.reg != b.reg);
986 }
987
988 /* create a new label */
989 static struct etna_compile_label *
990 alloc_new_label(struct etna_compile *c)
991 {
992 struct etna_compile_label label = {
993 .inst_idx = -1, /* start by point to no specific instruction */
994 };
995
996 array_insert(c->labels, label);
997
998 return &c->labels[c->labels_count - 1];
999 }
1000
1001 /* place label at current instruction pointer */
1002 static void
1003 label_place(struct etna_compile *c, struct etna_compile_label *label)
1004 {
1005 label->inst_idx = c->inst_ptr;
1006 }
1007
1008 /* mark label use at current instruction.
1009 * target of the label will be filled in in the marked instruction's src2.imm
1010 * slot as soon
1011 * as the value becomes known.
1012 */
1013 static void
1014 label_mark_use(struct etna_compile *c, struct etna_compile_label *label)
1015 {
1016 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1017 c->lbl_usage[c->inst_ptr] = label;
1018 }
1019
1020 /* walk the frame stack and return first frame with matching type */
1021 static struct etna_compile_frame *
1022 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1023 {
1024 for (unsigned sp = c->frame_sp; sp >= 0; sp--)
1025 if (c->frame_stack[sp].type == type)
1026 return &c->frame_stack[sp];
1027
1028 assert(0);
1029 return NULL;
1030 }
1031
1032 struct instr_translater {
1033 void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1034 const struct tgsi_full_instruction *inst,
1035 struct etna_inst_src *src);
1036 unsigned tgsi_opc;
1037 uint8_t opc;
1038
1039 /* tgsi src -> etna src swizzle */
1040 int src[3];
1041
1042 unsigned cond;
1043 };
1044
1045 static void
1046 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1047 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1048 {
1049 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1050 struct etna_inst instr = { };
1051
1052 instr.opcode = t->opc;
1053 instr.cond = t->cond;
1054 instr.sat = inst->Instruction.Saturate;
1055
1056 assert(info->num_dst <= 1);
1057 if (info->num_dst)
1058 instr.dst = convert_dst(c, &inst->Dst[0]);
1059
1060 assert(info->num_src <= ETNA_NUM_SRC);
1061
1062 for (unsigned i = 0; i < info->num_src; i++) {
1063 int swizzle = t->src[i];
1064
1065 assert(swizzle != -1);
1066 instr.src[swizzle] = src[i];
1067 }
1068
1069 emit_inst(c, &instr);
1070 }
1071
1072 static void
1073 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1074 const struct tgsi_full_instruction *inst,
1075 struct etna_inst_src *src)
1076 {
1077 emit_inst(c, &(struct etna_inst) {
1078 .opcode = INST_OPCODE_SELECT,
1079 .cond = t->cond,
1080 .sat = inst->Instruction.Saturate,
1081 .dst = convert_dst(c, &inst->Dst[0]),
1082 .src[0] = src[0],
1083 .src[1] = src[1],
1084 .src[2] = src[0],
1085 });
1086 }
1087
1088 static void
1089 trans_if(const struct instr_translater *t, struct etna_compile *c,
1090 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1091 {
1092 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1093 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1094
1095 /* push IF to stack */
1096 f->type = ETNA_COMPILE_FRAME_IF;
1097 /* create "else" label */
1098 f->lbl_else = alloc_new_label(c);
1099 f->lbl_endif = NULL;
1100
1101 /* We need to avoid the emit_inst() below becoming two instructions */
1102 if (etna_src_uniforms_conflict(src[0], imm_0))
1103 src[0] = etna_mov_src(c, src[0]);
1104
1105 /* mark position in instruction stream of label reference so that it can be
1106 * filled in in next pass */
1107 label_mark_use(c, f->lbl_else);
1108
1109 /* create conditional branch to label if src0 EQ 0 */
1110 emit_inst(c, &(struct etna_inst){
1111 .opcode = INST_OPCODE_BRANCH,
1112 .cond = INST_CONDITION_EQ,
1113 .src[0] = src[0],
1114 .src[1] = imm_0,
1115 /* imm is filled in later */
1116 });
1117 }
1118
1119 static void
1120 trans_else(const struct instr_translater *t, struct etna_compile *c,
1121 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1122 {
1123 assert(c->frame_sp > 0);
1124 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1125 assert(f->type == ETNA_COMPILE_FRAME_IF);
1126
1127 /* create "endif" label, and branch to endif label */
1128 f->lbl_endif = alloc_new_label(c);
1129 label_mark_use(c, f->lbl_endif);
1130 emit_inst(c, &(struct etna_inst) {
1131 .opcode = INST_OPCODE_BRANCH,
1132 .cond = INST_CONDITION_TRUE,
1133 /* imm is filled in later */
1134 });
1135
1136 /* mark "else" label at this position in instruction stream */
1137 label_place(c, f->lbl_else);
1138 }
1139
1140 static void
1141 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1142 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1143 {
1144 assert(c->frame_sp > 0);
1145 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1146 assert(f->type == ETNA_COMPILE_FRAME_IF);
1147
1148 /* assign "endif" or "else" (if no ELSE) label to current position in
1149 * instruction stream, pop IF */
1150 if (f->lbl_endif != NULL)
1151 label_place(c, f->lbl_endif);
1152 else
1153 label_place(c, f->lbl_else);
1154 }
1155
1156 static void
1157 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1158 const struct tgsi_full_instruction *inst,
1159 struct etna_inst_src *src)
1160 {
1161 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1162
1163 /* push LOOP to stack */
1164 f->type = ETNA_COMPILE_FRAME_LOOP;
1165 f->lbl_loop_bgn = alloc_new_label(c);
1166 f->lbl_loop_end = alloc_new_label(c);
1167
1168 label_place(c, f->lbl_loop_bgn);
1169 }
1170
1171 static void
1172 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1173 const struct tgsi_full_instruction *inst,
1174 struct etna_inst_src *src)
1175 {
1176 assert(c->frame_sp > 0);
1177 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1178 assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1179
1180 /* mark position in instruction stream of label reference so that it can be
1181 * filled in in next pass */
1182 label_mark_use(c, f->lbl_loop_bgn);
1183
1184 /* create branch to loop_bgn label */
1185 emit_inst(c, &(struct etna_inst) {
1186 .opcode = INST_OPCODE_BRANCH,
1187 .cond = INST_CONDITION_TRUE,
1188 .src[0] = src[0],
1189 /* imm is filled in later */
1190 });
1191
1192 label_place(c, f->lbl_loop_end);
1193 }
1194
1195 static void
1196 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1197 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1198 {
1199 assert(c->frame_sp > 0);
1200 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1201
1202 /* mark position in instruction stream of label reference so that it can be
1203 * filled in in next pass */
1204 label_mark_use(c, f->lbl_loop_end);
1205
1206 /* create branch to loop_end label */
1207 emit_inst(c, &(struct etna_inst) {
1208 .opcode = INST_OPCODE_BRANCH,
1209 .cond = INST_CONDITION_TRUE,
1210 .src[0] = src[0],
1211 /* imm is filled in later */
1212 });
1213 }
1214
1215 static void
1216 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1217 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1218 {
1219 assert(c->frame_sp > 0);
1220 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1221
1222 /* mark position in instruction stream of label reference so that it can be
1223 * filled in in next pass */
1224 label_mark_use(c, f->lbl_loop_bgn);
1225
1226 /* create branch to loop_end label */
1227 emit_inst(c, &(struct etna_inst) {
1228 .opcode = INST_OPCODE_BRANCH,
1229 .cond = INST_CONDITION_TRUE,
1230 .src[0] = src[0],
1231 /* imm is filled in later */
1232 });
1233 }
1234
1235 static void
1236 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1237 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1238 {
1239 emit_inst(c, &(struct etna_inst) {
1240 .opcode = t->opc,
1241 .sat = inst->Instruction.Saturate,
1242 .dst = convert_dst(c, &inst->Dst[0]),
1243 .src[0] = src[0],
1244 .src[2] = src[0],
1245 });
1246 }
1247
1248 static void
1249 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1250 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1251 {
1252 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1253 struct etna_inst arl = { };
1254 struct etna_inst_dst dst;
1255
1256 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1257 INST_COMPS_W);
1258
1259 if (c->specs->has_sign_floor_ceil) {
1260 struct etna_inst floor = { };
1261
1262 floor.opcode = INST_OPCODE_FLOOR;
1263 floor.src[2] = src[0];
1264 floor.dst = dst;
1265
1266 emit_inst(c, &floor);
1267 } else {
1268 struct etna_inst floor[2] = { };
1269
1270 floor[0].opcode = INST_OPCODE_FRC;
1271 floor[0].sat = inst->Instruction.Saturate;
1272 floor[0].dst = dst;
1273 floor[0].src[2] = src[0];
1274
1275 floor[1].opcode = INST_OPCODE_ADD;
1276 floor[1].sat = inst->Instruction.Saturate;
1277 floor[1].dst = dst;
1278 floor[1].src[0] = src[0];
1279 floor[1].src[2].use = 1;
1280 floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1281 floor[1].src[2].neg = 1;
1282 floor[1].src[2].rgroup = temp.rgroup;
1283 floor[1].src[2].reg = temp.id;
1284
1285 emit_inst(c, &floor[0]);
1286 emit_inst(c, &floor[1]);
1287 }
1288
1289 arl.opcode = INST_OPCODE_MOVAR;
1290 arl.sat = inst->Instruction.Saturate;
1291 arl.dst = convert_dst(c, &inst->Dst[0]);
1292 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1293
1294 emit_inst(c, &arl);
1295 }
1296
1297 static void
1298 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1299 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1300 {
1301 /* dst = src0 * src1 + (1 - src0) * src2
1302 * => src0 * src1 - (src0 - 1) * src2
1303 * => src0 * src1 - (src0 * src2 - src2)
1304 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1305 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1306 */
1307 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1308 if (etna_src_uniforms_conflict(src[0], src[1]) ||
1309 etna_src_uniforms_conflict(src[0], src[2])) {
1310 src[0] = etna_mov_src(c, src[0]);
1311 }
1312
1313 struct etna_inst mad[2] = { };
1314 mad[0].opcode = INST_OPCODE_MAD;
1315 mad[0].sat = 0;
1316 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1317 INST_COMPS_Z | INST_COMPS_W);
1318 mad[0].src[0] = src[0];
1319 mad[0].src[1] = src[2];
1320 mad[0].src[2] = negate(src[2]);
1321 mad[1].opcode = INST_OPCODE_MAD;
1322 mad[1].sat = inst->Instruction.Saturate;
1323 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1324 mad[1].src[1] = src[1];
1325 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1326
1327 emit_inst(c, &mad[0]);
1328 emit_inst(c, &mad[1]);
1329 }
1330
1331 static void
1332 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1333 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1334 {
1335 /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1336 * - can be eliminated if src.y is a uniform and >= 0
1337 * SELECT.GT tmp.___w, 128, src.wwww, 128
1338 * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1339 * - can be eliminated if src.w is a uniform and fits clamp
1340 * LOG tmp.x, void, void, tmp.yyyy
1341 * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1342 * LITP dst, undef, src.xxxx, tmp.xxxx
1343 */
1344 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1345 struct etna_inst_src src_y = { };
1346
1347 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1348 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1349
1350 struct etna_inst ins = { };
1351 ins.opcode = INST_OPCODE_SELECT;
1352 ins.cond = INST_CONDITION_LT;
1353 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1354 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1355 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1356 emit_inst(c, &ins);
1357 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1358 src_y = alloc_imm_f32(c, 0.0);
1359 else
1360 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1361
1362 struct etna_inst_src src_w = { };
1363
1364 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1365 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1366
1367 struct etna_inst ins = { };
1368 ins.opcode = INST_OPCODE_SELECT;
1369 ins.cond = INST_CONDITION_GT;
1370 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1371 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1372 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1373 emit_inst(c, &ins);
1374 ins.cond = INST_CONDITION_LT;
1375 ins.src[0].neg = !ins.src[0].neg;
1376 ins.src[2].neg = !ins.src[2].neg;
1377 ins.src[1] = src_w;
1378 emit_inst(c, &ins);
1379 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1380 src_w = alloc_imm_f32(c, -128.);
1381 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1382 src_w = alloc_imm_f32(c, 128.);
1383 else
1384 src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1385
1386 struct etna_inst ins[3] = { };
1387 ins[0].opcode = INST_OPCODE_LOG;
1388 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1389 ins[0].src[2] = src_y;
1390
1391 emit_inst(c, &ins[0]);
1392 emit_inst(c, &(struct etna_inst) {
1393 .opcode = INST_OPCODE_MUL,
1394 .sat = 0,
1395 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1396 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1397 .src[1] = src_w,
1398 });
1399 emit_inst(c, &(struct etna_inst) {
1400 .opcode = INST_OPCODE_LITP,
1401 .sat = 0,
1402 .dst = convert_dst(c, &inst->Dst[0]),
1403 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1404 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1405 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1406 });
1407 }
1408
1409 static void
1410 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1411 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1412 {
1413 if (c->specs->has_sign_floor_ceil) {
1414 emit_inst(c, &(struct etna_inst){
1415 .opcode = INST_OPCODE_SIGN,
1416 .sat = inst->Instruction.Saturate,
1417 .dst = convert_dst(c, &inst->Dst[0]),
1418 .src[2] = src[0],
1419 });
1420 } else {
1421 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1422 struct etna_inst ins[2] = { };
1423
1424 ins[0].opcode = INST_OPCODE_SET;
1425 ins[0].cond = INST_CONDITION_NZ;
1426 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1427 INST_COMPS_Z | INST_COMPS_W);
1428 ins[0].src[0] = src[0];
1429
1430 ins[1].opcode = INST_OPCODE_SELECT;
1431 ins[1].cond = INST_CONDITION_LZ;
1432 ins[1].sat = inst->Instruction.Saturate;
1433 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1434 ins[1].src[0] = src[0];
1435 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1436 ins[1].src[1] = negate(ins[1].src[2]);
1437
1438 emit_inst(c, &ins[0]);
1439 emit_inst(c, &ins[1]);
1440 }
1441 }
1442
1443 static void
1444 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1445 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1446 {
1447 if (c->specs->has_sin_cos_sqrt) {
1448 /* TGSI lowering should deal with SCS */
1449 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1450
1451 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1452 /* add divide by PI/2, using a temp register. GC2000
1453 * fails with src==dst for the trig instruction. */
1454 emit_inst(c, &(struct etna_inst) {
1455 .opcode = INST_OPCODE_MUL,
1456 .sat = 0,
1457 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1458 INST_COMPS_Z | INST_COMPS_W),
1459 .src[0] = src[0], /* any swizzling happens here */
1460 .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1461 });
1462 emit_inst(c, &(struct etna_inst) {
1463 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1464 ? INST_OPCODE_COS
1465 : INST_OPCODE_SIN,
1466 .sat = inst->Instruction.Saturate,
1467 .dst = convert_dst(c, &inst->Dst[0]),
1468 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1469 });
1470 } else {
1471 /* Implement Nick's fast sine/cosine. Taken from:
1472 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1473 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1474 * MAD t.x_zw, src.xxxx, A, B
1475 * FRC t.x_z_, void, void, t.xwzw
1476 * MAD t.x_z_, t.xwzw, 2, -1
1477 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
1478 * DP3 t.x_z_, t.zyww, C, void (for sin)
1479 * DP3 t.__z_, t.zyww, C, void (for scs)
1480 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
1481 * DP3 t.x_z_, t.xyww, C, void (for cos)
1482 * DP3 t.x___, t.xyww, C, void (for scs)
1483 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1484 * MAD dst, t.ywyw, .2225, t.xzxz
1485 *
1486 * TODO: we don't set dst.zw correctly for SCS.
1487 */
1488 struct etna_inst *p, ins[9] = { };
1489 struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1490 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1491 struct etna_inst_src sincos[3], in = src[0];
1492 sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1493 sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1494
1495 /* A uniform source will cause the inner temp limit to
1496 * be exceeded. Explicitly deal with that scenario.
1497 */
1498 if (etna_rgroup_is_uniform(src[0].rgroup)) {
1499 struct etna_inst ins = { };
1500 ins.opcode = INST_OPCODE_MOV;
1501 ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1502 ins.src[2] = in;
1503 emit_inst(c, &ins);
1504 in = t0s;
1505 }
1506
1507 ins[0].opcode = INST_OPCODE_MAD;
1508 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1509 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1510 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1511 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1512
1513 ins[1].opcode = INST_OPCODE_FRC;
1514 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1515 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1516
1517 ins[2].opcode = INST_OPCODE_MAD;
1518 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1519 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1520 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1521 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1522
1523 unsigned mul_swiz, dp3_swiz;
1524 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1525 mul_swiz = SWIZZLE(W, Z, W, W);
1526 dp3_swiz = SWIZZLE(Z, Y, W, W);
1527 } else {
1528 mul_swiz = SWIZZLE(W, X, W, W);
1529 dp3_swiz = SWIZZLE(X, Y, W, W);
1530 }
1531
1532 ins[3].opcode = INST_OPCODE_MUL;
1533 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1534 ins[3].src[0] = swizzle(t0s, mul_swiz);
1535 ins[3].src[1] = absolute(ins[3].src[0]);
1536
1537 ins[4].opcode = INST_OPCODE_DP3;
1538 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1539 ins[4].src[0] = swizzle(t0s, dp3_swiz);
1540 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1541
1542 if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
1543 ins[5] = ins[3];
1544 ins[6] = ins[4];
1545 ins[4].dst.comps = INST_COMPS_X;
1546 ins[6].dst.comps = INST_COMPS_Z;
1547 ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
1548 ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
1549 ins[5].src[1] = absolute(ins[5].src[0]);
1550 p = &ins[7];
1551 } else {
1552 p = &ins[5];
1553 }
1554
1555 p->opcode = INST_OPCODE_MAD;
1556 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1557 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1558 p->src[1] = absolute(p->src[0]);
1559 p->src[2] = negate(p->src[0]);
1560
1561 p++;
1562 p->opcode = INST_OPCODE_MAD;
1563 p->sat = inst->Instruction.Saturate;
1564 p->dst = convert_dst(c, &inst->Dst[0]),
1565 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1566 p->src[1] = alloc_imm_f32(c, 0.2225);
1567 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1568
1569 for (int i = 0; &ins[i] <= p; i++)
1570 emit_inst(c, &ins[i]);
1571 }
1572 }
1573
1574 static void
1575 trans_dph(const struct instr_translater *t, struct etna_compile *c,
1576 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1577 {
1578 /*
1579 DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
1580 ADD dst.xyzw, tmp.xyzw, void, src1.wwww
1581 */
1582 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1583 struct etna_inst ins[2] = { };
1584
1585 ins[0].opcode = INST_OPCODE_DP3;
1586 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1587 INST_COMPS_Z | INST_COMPS_W);
1588 ins[0].src[0] = src[0];
1589 ins[0].src[1] = src[1];
1590
1591 ins[1].opcode = INST_OPCODE_ADD;
1592 ins[1].sat = inst->Instruction.Saturate;
1593 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1594 ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1595 ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
1596
1597 emit_inst(c, &ins[0]);
1598 emit_inst(c, &ins[1]);
1599 }
1600
1601 static void
1602 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1603 const struct tgsi_full_instruction *inst,
1604 struct etna_inst_src *src)
1605 {
1606 /* There is no native support for GL texture rectangle coordinates, so
1607 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1608 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1609 uint32_t unit = inst->Src[1].Register.Index;
1610 struct etna_inst ins[2] = { };
1611 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1612
1613 ins[0].opcode = INST_OPCODE_MUL;
1614 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1615 ins[0].src[0] = src[0];
1616 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1617
1618 ins[1].opcode = INST_OPCODE_MUL;
1619 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1620 ins[1].src[0] = src[0];
1621 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1622
1623 emit_inst(c, &ins[0]);
1624 emit_inst(c, &ins[1]);
1625
1626 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1627 }
1628
1629 switch (inst->Instruction.Opcode) {
1630 case TGSI_OPCODE_TEX:
1631 emit_inst(c, &(struct etna_inst) {
1632 .opcode = INST_OPCODE_TEXLD,
1633 .sat = 0,
1634 .dst = convert_dst(c, &inst->Dst[0]),
1635 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1636 .src[0] = src[0],
1637 });
1638 break;
1639
1640 case TGSI_OPCODE_TXB:
1641 emit_inst(c, &(struct etna_inst) {
1642 .opcode = INST_OPCODE_TEXLDB,
1643 .sat = 0,
1644 .dst = convert_dst(c, &inst->Dst[0]),
1645 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1646 .src[0] = src[0],
1647 });
1648 break;
1649
1650 case TGSI_OPCODE_TXL:
1651 emit_inst(c, &(struct etna_inst) {
1652 .opcode = INST_OPCODE_TEXLDL,
1653 .sat = 0,
1654 .dst = convert_dst(c, &inst->Dst[0]),
1655 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1656 .src[0] = src[0],
1657 });
1658 break;
1659
1660 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1661 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1662
1663 emit_inst(c, &(struct etna_inst) {
1664 .opcode = INST_OPCODE_RCP,
1665 .sat = 0,
1666 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1667 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1668 });
1669 emit_inst(c, &(struct etna_inst) {
1670 .opcode = INST_OPCODE_MUL,
1671 .sat = 0,
1672 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1673 INST_COMPS_Z), /* tmp.xyz */
1674 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1675 .src[1] = src[0], /* src.xyzw */
1676 });
1677 emit_inst(c, &(struct etna_inst) {
1678 .opcode = INST_OPCODE_TEXLD,
1679 .sat = 0,
1680 .dst = convert_dst(c, &inst->Dst[0]),
1681 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1682 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1683 });
1684 } break;
1685
1686 default:
1687 BUG("Unhandled instruction %s",
1688 tgsi_get_opcode_name(inst->Instruction.Opcode));
1689 assert(0);
1690 break;
1691 }
1692 }
1693
1694 static void
1695 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1696 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1697 {
1698 /* nothing to do */
1699 }
1700
1701 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1702 #define INSTR(n, f, ...) \
1703 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1704
1705 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1706 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1707 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1708 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1709 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1710 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1711 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1712 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1713 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1714 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1715 INSTR(LG2, trans_instr, .opc = INST_OPCODE_LOG, .src = {2, -1, -1}),
1716 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1717 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1718 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1719 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1720 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1721
1722 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1723 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1724
1725 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1726 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1727
1728 INSTR(IF, trans_if),
1729 INSTR(ELSE, trans_else),
1730 INSTR(ENDIF, trans_endif),
1731
1732 INSTR(BGNLOOP, trans_loop_bgn),
1733 INSTR(ENDLOOP, trans_loop_end),
1734 INSTR(BRK, trans_brk),
1735 INSTR(CONT, trans_cont),
1736
1737 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1738 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1739
1740 INSTR(ARL, trans_arl),
1741 INSTR(LRP, trans_lrp),
1742 INSTR(LIT, trans_lit),
1743 INSTR(SSG, trans_ssg),
1744 INSTR(DPH, trans_dph),
1745
1746 INSTR(SIN, trans_trig),
1747 INSTR(COS, trans_trig),
1748 INSTR(SCS, trans_trig),
1749
1750 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1751 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1752 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1753 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1754 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1755 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1756
1757 INSTR(TEX, trans_sampler),
1758 INSTR(TXB, trans_sampler),
1759 INSTR(TXL, trans_sampler),
1760 INSTR(TXP, trans_sampler),
1761
1762 INSTR(NOP, trans_dummy),
1763 INSTR(END, trans_dummy),
1764 };
1765
1766 /* Pass -- compile instructions */
1767 static void
1768 etna_compile_pass_generate_code(struct etna_compile *c)
1769 {
1770 struct tgsi_parse_context ctx = { };
1771 unsigned status = tgsi_parse_init(&ctx, c->tokens);
1772 assert(status == TGSI_PARSE_OK);
1773
1774 int inst_idx = 0;
1775 while (!tgsi_parse_end_of_tokens(&ctx)) {
1776 const struct tgsi_full_instruction *inst = 0;
1777
1778 /* No inner temps used yet for this instruction, clear counter */
1779 c->inner_temps = 0;
1780
1781 tgsi_parse_token(&ctx);
1782
1783 switch (ctx.FullToken.Token.Type) {
1784 case TGSI_TOKEN_TYPE_INSTRUCTION:
1785 /* iterate over operands */
1786 inst = &ctx.FullToken.FullInstruction;
1787 if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1788 inst_idx++;
1789 continue;
1790 }
1791
1792 /* Lookup the TGSI information and generate the source arguments */
1793 struct etna_inst_src src[ETNA_NUM_SRC];
1794 memset(src, 0, sizeof(src));
1795
1796 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1797
1798 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1799 const struct tgsi_full_src_register *reg = &inst->Src[i];
1800 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1801
1802 if (!n->valid || n->is_tex)
1803 continue;
1804
1805 src[i] = etna_create_src(reg, n);
1806 }
1807
1808 const unsigned opc = inst->Instruction.Opcode;
1809 const struct instr_translater *t = &translaters[opc];
1810
1811 if (t->fxn) {
1812 t->fxn(t, c, inst, src);
1813
1814 inst_idx += 1;
1815 } else {
1816 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1817 assert(0);
1818 }
1819 break;
1820 }
1821 }
1822 tgsi_parse_free(&ctx);
1823 }
1824
1825 /* Look up register by semantic */
1826 static struct etna_reg_desc *
1827 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1828 {
1829 for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1830 struct etna_reg_desc *reg = &c->file[file].reg[idx];
1831
1832 if (reg->semantic.Name == name && reg->semantic.Index == index)
1833 return reg;
1834 }
1835
1836 return NULL; /* not found */
1837 }
1838
1839 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1840 * - this is a vertex shader
1841 * - and this is an older GPU
1842 */
1843 static void
1844 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1845 {
1846 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1847 /* find position out */
1848 struct etna_reg_desc *pos_reg =
1849 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1850
1851 if (pos_reg != NULL) {
1852 /*
1853 * ADD tX.__z_, tX.zzzz, void, tX.wwww
1854 * MUL tX.__z_, tX.zzzz, 0.5, void
1855 */
1856 emit_inst(c, &(struct etna_inst) {
1857 .opcode = INST_OPCODE_ADD,
1858 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1859 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1860 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1861 });
1862 emit_inst(c, &(struct etna_inst) {
1863 .opcode = INST_OPCODE_MUL,
1864 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1865 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1866 .src[1] = alloc_imm_f32(c, 0.5f),
1867 });
1868 }
1869 }
1870 }
1871
1872 /** add a NOP to the shader if
1873 * a) the shader is empty
1874 * or
1875 * b) there is a label at the end of the shader
1876 */
1877 static void
1878 etna_compile_add_nop_if_needed(struct etna_compile *c)
1879 {
1880 bool label_at_last_inst = false;
1881
1882 for (int idx = 0; idx < c->labels_count; ++idx) {
1883 if (c->labels[idx].inst_idx == c->inst_ptr)
1884 label_at_last_inst = true;
1885
1886 }
1887
1888 if (c->inst_ptr == 0 || label_at_last_inst)
1889 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1890 }
1891
1892 static void
1893 assign_uniforms(struct etna_compile_file *file, unsigned base)
1894 {
1895 for (int idx = 0; idx < file->reg_size; ++idx) {
1896 file->reg[idx].native.valid = 1;
1897 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
1898 file->reg[idx].native.id = base + idx;
1899 }
1900 }
1901
1902 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
1903 * CONST must be consecutive as const buffers are supposed to be consecutive,
1904 * and before IMM, as this is
1905 * more convenient because is possible for the compilation process itself to
1906 * generate extra
1907 * immediates for constants such as pi, one, zero.
1908 */
1909 static void
1910 assign_constants_and_immediates(struct etna_compile *c)
1911 {
1912 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
1913 /* immediates start after the constants */
1914 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
1915 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
1916 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
1917 c->imm_size);
1918 }
1919
1920 /* Assign declared samplers to native texture units */
1921 static void
1922 assign_texture_units(struct etna_compile *c)
1923 {
1924 uint tex_base = 0;
1925
1926 if (c->info.processor == PIPE_SHADER_VERTEX)
1927 tex_base = c->specs->vertex_sampler_offset;
1928
1929 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
1930 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
1931 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
1932 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
1933 }
1934 }
1935
1936 /* Additional pass to fill in branch targets. This pass should be last
1937 * as no instruction reordering or removing/addition can be done anymore
1938 * once the branch targets are computed.
1939 */
1940 static void
1941 etna_compile_fill_in_labels(struct etna_compile *c)
1942 {
1943 for (int idx = 0; idx < c->inst_ptr; ++idx) {
1944 if (c->lbl_usage[idx])
1945 etna_assemble_set_imm(&c->code[idx * 4], c->lbl_usage[idx]->inst_idx);
1946 }
1947 }
1948
1949 /* compare two etna_native_reg structures, return true if equal */
1950 static bool
1951 cmp_etna_native_reg(const struct etna_native_reg to,
1952 const struct etna_native_reg from)
1953 {
1954 return to.valid == from.valid && to.is_tex == from.is_tex &&
1955 to.rgroup == from.rgroup && to.id == from.id;
1956 }
1957
1958 /* go through all declarations and swap native registers *to* and *from* */
1959 static void
1960 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
1961 const struct etna_native_reg from)
1962 {
1963 if (cmp_etna_native_reg(from, to))
1964 return; /* Nothing to do */
1965
1966 for (int idx = 0; idx < c->total_decls; ++idx) {
1967 if (cmp_etna_native_reg(c->decl[idx].native, from)) {
1968 c->decl[idx].native = to;
1969 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
1970 c->decl[idx].native = from;
1971 }
1972 }
1973 }
1974
1975 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
1976 * Semantic POS is always t0. If that semantic is not used, avoid t0.
1977 */
1978 static void
1979 permute_ps_inputs(struct etna_compile *c)
1980 {
1981 /* Special inputs:
1982 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
1983 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
1984 */
1985 uint native_idx = 1;
1986
1987 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
1988 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
1989 uint input_id;
1990 assert(reg->has_semantic);
1991
1992 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
1993 continue;
1994
1995 input_id = native_idx++;
1996 swap_native_registers(c, etna_native_temp(input_id),
1997 c->file[TGSI_FILE_INPUT].reg[idx].native);
1998 }
1999
2000 c->num_varyings = native_idx - 1;
2001
2002 if (native_idx > c->next_free_native)
2003 c->next_free_native = native_idx;
2004 }
2005
2006 /* fill in ps inputs into shader object */
2007 static void
2008 fill_in_ps_inputs(struct etna_shader *sobj, struct etna_compile *c)
2009 {
2010 struct etna_shader_io_file *sf = &sobj->infile;
2011
2012 sf->num_reg = 0;
2013
2014 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2015 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2016
2017 if (reg->native.id > 0) {
2018 assert(sf->num_reg < ETNA_NUM_INPUTS);
2019 sf->reg[sf->num_reg].reg = reg->native.id;
2020 sf->reg[sf->num_reg].semantic = reg->semantic;
2021 /* convert usage mask to number of components (*=wildcard)
2022 * .r (0..1) -> 1 component
2023 * .*g (2..3) -> 2 component
2024 * .**b (4..7) -> 3 components
2025 * .***a (8..15) -> 4 components
2026 */
2027 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2028 sf->num_reg++;
2029 }
2030 }
2031
2032 assert(sf->num_reg == c->num_varyings);
2033 sobj->input_count_unk8 = 31; /* XXX what is this */
2034 }
2035
2036 /* fill in output mapping for ps into shader object */
2037 static void
2038 fill_in_ps_outputs(struct etna_shader *sobj, struct etna_compile *c)
2039 {
2040 sobj->outfile.num_reg = 0;
2041
2042 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2043 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2044
2045 switch (reg->semantic.Name) {
2046 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2047 sobj->ps_color_out_reg = reg->native.id;
2048 break;
2049 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2050 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2051 break;
2052 default:
2053 assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2054 }
2055 }
2056 }
2057
2058 /* fill in inputs for vs into shader object */
2059 static void
2060 fill_in_vs_inputs(struct etna_shader *sobj, struct etna_compile *c)
2061 {
2062 struct etna_shader_io_file *sf = &sobj->infile;
2063
2064 sf->num_reg = 0;
2065 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2066 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2067 assert(sf->num_reg < ETNA_NUM_INPUTS);
2068 /* XXX exclude inputs with special semantics such as gl_frontFacing */
2069 sf->reg[sf->num_reg].reg = reg->native.id;
2070 sf->reg[sf->num_reg].semantic = reg->semantic;
2071 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2072 sf->num_reg++;
2073 }
2074
2075 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2076 }
2077
2078 /* build two-level output index [Semantic][Index] for fast linking */
2079 static void
2080 build_output_index(struct etna_shader *sobj)
2081 {
2082 int total = 0;
2083 int offset = 0;
2084
2085 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2086 total += sobj->output_count_per_semantic[name];
2087
2088 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2089
2090 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2091 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2092 offset += sobj->output_count_per_semantic[name];
2093 }
2094
2095 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2096 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2097 [sobj->outfile.reg[idx].semantic.Index] =
2098 &sobj->outfile.reg[idx];
2099 }
2100 }
2101
2102 /* fill in outputs for vs into shader object */
2103 static void
2104 fill_in_vs_outputs(struct etna_shader *sobj, struct etna_compile *c)
2105 {
2106 struct etna_shader_io_file *sf = &sobj->outfile;
2107
2108 sf->num_reg = 0;
2109 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2110 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2111 assert(sf->num_reg < ETNA_NUM_INPUTS);
2112
2113 switch (reg->semantic.Name) {
2114 case TGSI_SEMANTIC_POSITION:
2115 sobj->vs_pos_out_reg = reg->native.id;
2116 break;
2117 case TGSI_SEMANTIC_PSIZE:
2118 sobj->vs_pointsize_out_reg = reg->native.id;
2119 break;
2120 default:
2121 sf->reg[sf->num_reg].reg = reg->native.id;
2122 sf->reg[sf->num_reg].semantic = reg->semantic;
2123 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2124 sf->num_reg++;
2125 sobj->output_count_per_semantic[reg->semantic.Name] =
2126 MAX2(reg->semantic.Index + 1,
2127 sobj->output_count_per_semantic[reg->semantic.Name]);
2128 }
2129 }
2130
2131 /* build two-level index for linking */
2132 build_output_index(sobj);
2133
2134 /* fill in "mystery meat" load balancing value. This value determines how
2135 * work is scheduled between VS and PS
2136 * in the unified shader architecture. More precisely, it is determined from
2137 * the number of VS outputs, as well as chip-specific
2138 * vertex output buffer size, vertex cache size, and the number of shader
2139 * cores.
2140 *
2141 * XXX this is a conservative estimate, the "optimal" value is only known for
2142 * sure at link time because some
2143 * outputs may be unused and thus unmapped. Then again, in the general use
2144 * case with GLSL the vertex and fragment
2145 * shaders are linked already before submitting to Gallium, thus all outputs
2146 * are used.
2147 */
2148 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2149 assert(half_out);
2150
2151 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2152 2 * half_out * c->specs->vertex_cache_size)) +
2153 9) /
2154 10;
2155 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2156 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2157 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2158 VIVS_VS_LOAD_BALANCING_C(0x3f) |
2159 VIVS_VS_LOAD_BALANCING_D(0x0f);
2160 }
2161
2162 static bool
2163 etna_compile_check_limits(struct etna_compile *c)
2164 {
2165 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2166 ? c->specs->max_vs_uniforms
2167 : c->specs->max_ps_uniforms;
2168 /* round up number of uniforms, including immediates, in units of four */
2169 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2170
2171 if (c->inst_ptr > c->specs->max_instructions) {
2172 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2173 c->specs->max_instructions);
2174 return false;
2175 }
2176
2177 if (c->next_free_native > c->specs->max_registers) {
2178 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2179 c->specs->max_registers);
2180 return false;
2181 }
2182
2183 if (num_uniforms > max_uniforms) {
2184 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2185 max_uniforms);
2186 return false;
2187 }
2188
2189 if (c->num_varyings > c->specs->max_varyings) {
2190 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2191 c->specs->max_varyings);
2192 return false;
2193 }
2194
2195 if (c->imm_base > c->specs->num_constants) {
2196 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2197 c->specs->num_constants);
2198 }
2199
2200 return true;
2201 }
2202
2203 static void
2204 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader *sobj)
2205 {
2206 uint32_t count = c->imm_size;
2207 struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2208
2209 uinfo->const_count = c->imm_base;
2210 uinfo->imm_count = count;
2211 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2212 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2213
2214 etna_set_shader_uniforms_dirty_flags(sobj);
2215 }
2216
2217 struct etna_shader *
2218 etna_compile_shader(const struct etna_specs *specs,
2219 const struct tgsi_token *tokens)
2220 {
2221 /* Create scratch space that may be too large to fit on stack
2222 */
2223 bool ret;
2224 struct etna_compile *c;
2225 struct etna_shader *shader;
2226
2227 struct tgsi_lowering_config lconfig = {
2228 .lower_SCS = specs->has_sin_cos_sqrt,
2229 .lower_FLR = !specs->has_sign_floor_ceil,
2230 .lower_CEIL = !specs->has_sign_floor_ceil,
2231 .lower_POW = true,
2232 .lower_EXP = true,
2233 .lower_LOG = true,
2234 .lower_DP2 = true,
2235 .lower_DP2A = true,
2236 .lower_TRUNC = true,
2237 .lower_XPD = true
2238 };
2239
2240 c = CALLOC_STRUCT(etna_compile);
2241 if (!c)
2242 return NULL;
2243
2244 shader = CALLOC_STRUCT(etna_shader);
2245 if (!shader)
2246 goto out;
2247
2248 c->specs = specs;
2249 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2250 c->free_tokens = !!c->tokens;
2251 if (!c->tokens) {
2252 /* no lowering */
2253 c->tokens = tokens;
2254 }
2255
2256 /* Build a map from gallium register to native registers for files
2257 * CONST, SAMP, IMM, OUT, IN, TEMP.
2258 * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2259 * vertex shaders.
2260 */
2261 /* Pass one -- check register file declarations and immediates */
2262 etna_compile_parse_declarations(c);
2263
2264 etna_allocate_decls(c);
2265
2266 /* Pass two -- check usage of temporaries, inputs, outputs */
2267 etna_compile_pass_check_usage(c);
2268
2269 assign_special_inputs(c);
2270
2271 /* Assign native temp register to TEMPs */
2272 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2273
2274 /* optimize outputs */
2275 etna_compile_pass_optimize_outputs(c);
2276
2277 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2278 * this is part of RGROUP_INTERNAL
2279 */
2280
2281 /* assign inputs: last usage of input should be <= first usage of temp */
2282 /* potential optimization case:
2283 * if single MOV TEMP[y], IN[x] before which temp y is not used, and
2284 * after which IN[x]
2285 * is not read, temp[y] can be used as input register as-is
2286 */
2287 /* sort temporaries by first use
2288 * sort inputs by last usage
2289 * iterate over inputs, temporaries
2290 * if last usage of input <= first usage of temp:
2291 * assign input to temp
2292 * advance input, temporary pointer
2293 * else
2294 * advance temporary pointer
2295 *
2296 * potential problem: instruction with multiple inputs of which one is the
2297 * temp and the other is the input;
2298 * however, as the temp is not used before this, how would this make
2299 * sense? uninitialized temporaries have an undefined
2300 * value, so this would be ok
2301 */
2302 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2303
2304 /* assign outputs: first usage of output should be >= last usage of temp */
2305 /* potential optimization case:
2306 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2307 * writing all components that are used in
2308 * the shader) after which temp y is no longer used temp[y] can be
2309 * used as output register as-is
2310 *
2311 * potential problem: instruction with multiple outputs of which one is the
2312 * temp and the other is the output;
2313 * however, as the temp is not used after this, how would this make
2314 * sense? could just discard the output value
2315 */
2316 /* sort temporaries by last use
2317 * sort outputs by first usage
2318 * iterate over outputs, temporaries
2319 * if first usage of output >= last usage of temp:
2320 * assign output to temp
2321 * advance output, temporary pointer
2322 * else
2323 * advance temporary pointer
2324 */
2325 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2326
2327 assign_constants_and_immediates(c);
2328 assign_texture_units(c);
2329
2330 /* list declarations */
2331 for (int x = 0; x < c->total_decls; ++x) {
2332 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2333 "last_use=%i native=%i usage_mask=%x "
2334 "has_semantic=%i",
2335 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2336 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2337 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2338 c->decl[x].usage_mask, c->decl[x].has_semantic);
2339 if (c->decl[x].has_semantic)
2340 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2341 tgsi_semantic_names[c->decl[x].semantic.Name],
2342 c->decl[x].semantic.Index);
2343 }
2344 /* XXX for PS we need to permute so that inputs are always in temporary
2345 * 0..N-1.
2346 * There is no "switchboard" for varyings (AFAIK!). The output color,
2347 * however, can be routed
2348 * from an arbitrary temporary.
2349 */
2350 if (c->info.processor == PIPE_SHADER_FRAGMENT)
2351 permute_ps_inputs(c);
2352
2353
2354 /* list declarations */
2355 for (int x = 0; x < c->total_decls; ++x) {
2356 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2357 "last_use=%i native=%i usage_mask=%x "
2358 "has_semantic=%i",
2359 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2360 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2361 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2362 c->decl[x].usage_mask, c->decl[x].has_semantic);
2363 if (c->decl[x].has_semantic)
2364 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2365 tgsi_semantic_names[c->decl[x].semantic.Name],
2366 c->decl[x].semantic.Index);
2367 }
2368
2369 /* pass 3: generate instructions */
2370 etna_compile_pass_generate_code(c);
2371 etna_compile_add_z_div_if_needed(c);
2372 etna_compile_add_nop_if_needed(c);
2373 etna_compile_fill_in_labels(c);
2374
2375 ret = etna_compile_check_limits(c);
2376 if (!ret) {
2377 FREE(shader);
2378 shader = NULL;
2379 goto out;
2380 }
2381
2382 /* fill in output structure */
2383 shader->processor = c->info.processor;
2384 shader->code_size = c->inst_ptr * 4;
2385 shader->code = mem_dup(c->code, c->inst_ptr * 16);
2386 shader->num_temps = c->next_free_native;
2387 shader->vs_pos_out_reg = -1;
2388 shader->vs_pointsize_out_reg = -1;
2389 shader->ps_color_out_reg = -1;
2390 shader->ps_depth_out_reg = -1;
2391 copy_uniform_state_to_shader(c, shader);
2392
2393 if (c->info.processor == PIPE_SHADER_VERTEX) {
2394 fill_in_vs_inputs(shader, c);
2395 fill_in_vs_outputs(shader, c);
2396 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2397 fill_in_ps_inputs(shader, c);
2398 fill_in_ps_outputs(shader, c);
2399 }
2400
2401 out:
2402 if (c->free_tokens)
2403 FREE((void *)c->tokens);
2404
2405 FREE(c->labels);
2406 FREE(c);
2407
2408 return shader;
2409 }
2410
2411 extern const char *tgsi_swizzle_names[];
2412 void
2413 etna_dump_shader(const struct etna_shader *shader)
2414 {
2415 if (shader->processor == PIPE_SHADER_VERTEX)
2416 printf("VERT\n");
2417 else
2418 printf("FRAG\n");
2419
2420
2421 etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2422
2423 printf("num temps: %i\n", shader->num_temps);
2424 printf("num const: %i\n", shader->uniforms.const_count);
2425 printf("immediates:\n");
2426 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2427 printf(" [%i].%s = %f (0x%08x)\n",
2428 (idx + shader->uniforms.const_count) / 4,
2429 tgsi_swizzle_names[idx % 4],
2430 *((float *)&shader->uniforms.imm_data[idx]),
2431 shader->uniforms.imm_data[idx]);
2432 }
2433 printf("inputs:\n");
2434 for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2435 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2436 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2437 shader->infile.reg[idx].semantic.Index,
2438 shader->infile.reg[idx].num_components);
2439 }
2440 printf("outputs:\n");
2441 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2442 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2443 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2444 shader->outfile.reg[idx].semantic.Index,
2445 shader->outfile.reg[idx].num_components);
2446 }
2447 printf("special:\n");
2448 if (shader->processor == PIPE_SHADER_VERTEX) {
2449 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2450 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2451 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2452 } else {
2453 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2454 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2455 }
2456 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8);
2457 }
2458
2459 void
2460 etna_destroy_shader(struct etna_shader *shader)
2461 {
2462 assert(shader);
2463
2464 FREE(shader->code);
2465 FREE(shader->uniforms.imm_data);
2466 FREE(shader->uniforms.imm_contents);
2467 FREE(shader->output_per_semantic_list);
2468 FREE(shader);
2469 }
2470
2471 static const struct etna_shader_inout *
2472 etna_shader_vs_lookup(const struct etna_shader *sobj,
2473 const struct etna_shader_inout *in)
2474 {
2475 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2476 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2477
2478 return NULL;
2479 }
2480
2481 bool
2482 etna_link_shader(struct etna_shader_link_info *info,
2483 const struct etna_shader *vs, const struct etna_shader *fs)
2484 {
2485 /* For each fragment input we need to find the associated vertex shader
2486 * output, which can be found by matching on semantic name and index. A
2487 * binary search could be used because the vs outputs are sorted by their
2488 * semantic index and grouped by semantic type by fill_in_vs_outputs.
2489 */
2490 assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2491
2492 for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2493 const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2494 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2495 struct etna_varying *varying;
2496
2497 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2498
2499 if (fsio->reg > info->num_varyings)
2500 info->num_varyings = fsio->reg;
2501
2502 varying = &info->varyings[fsio->reg - 1];
2503 varying->num_components = fsio->num_components;
2504
2505 if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
2506 varying->pa_attributes = 0x200;
2507 else /* texture coord or other bypasses flat shading */
2508 varying->pa_attributes = 0x2f1;
2509
2510 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2511 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2512 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2513 varying->use[2] = VARYING_COMPONENT_USE_USED;
2514 varying->use[3] = VARYING_COMPONENT_USE_USED;
2515 varying->reg = 0; /* replaced by point coord -- doesn't matter */
2516 continue;
2517 }
2518
2519 if (vsio == NULL)
2520 return true; /* not found -- link error */
2521
2522 varying->use[0] = VARYING_COMPONENT_USE_USED;
2523 varying->use[1] = VARYING_COMPONENT_USE_USED;
2524 varying->use[2] = VARYING_COMPONENT_USE_USED;
2525 varying->use[3] = VARYING_COMPONENT_USE_USED;
2526 varying->reg = vsio->reg;
2527 }
2528
2529 assert(info->num_varyings == fs->infile.num_reg);
2530
2531 return false;
2532 }