Revert "etnaviv: add support for snorm textures"
[mesa.git] / src / gallium / drivers / etnaviv / etnaviv_compiler.c
1 /*
2 * Copyright (c) 2012-2015 Etnaviv Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Wladimir J. van der Laan <laanwj@gmail.com>
25 */
26
27 /* TGSI->Vivante shader ISA conversion */
28
29 /* What does the compiler return (see etna_shader_object)?
30 * 1) instruction data
31 * 2) input-to-temporary mapping (fixed for ps)
32 * *) in case of ps, semantic -> varying id mapping
33 * *) for each varying: number of components used (r, rg, rgb, rgba)
34 * 3) temporary-to-output mapping (in case of vs, fixed for ps)
35 * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36 * 5) immediates base offset, immediates data
37 * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38 * configure the hw, but useful for error checking
39 * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
40 * (output reg id is enough)
41 *
42 * Empty shaders are not allowed, should always at least generate a NOP. Also
43 * if there is a label at the end of the shader, an extra NOP should be
44 * generated as jump target.
45 *
46 * TODO
47 * * Use an instruction scheduler
48 * * Indirect access to uniforms / temporaries using amode
49 */
50
51 #include "etnaviv_compiler.h"
52
53 #include "etnaviv_asm.h"
54 #include "etnaviv_context.h"
55 #include "etnaviv_debug.h"
56 #include "etnaviv_disasm.h"
57 #include "etnaviv_uniforms.h"
58 #include "etnaviv_util.h"
59
60 #include "pipe/p_shader_tokens.h"
61 #include "tgsi/tgsi_info.h"
62 #include "tgsi/tgsi_iterate.h"
63 #include "tgsi/tgsi_lowering.h"
64 #include "tgsi/tgsi_strings.h"
65 #include "tgsi/tgsi_util.h"
66 #include "util/u_math.h"
67 #include "util/u_memory.h"
68
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <sys/stat.h>
72 #include <sys/types.h>
73
74 #define ETNA_MAX_INNER_TEMPS 2
75
76 static const float sincos_const[2][4] = {
77 {
78 2., -1., 4., -4.,
79 },
80 {
81 1. / (2. * M_PI), 0.75, 0.5, 0.0,
82 },
83 };
84
85 /* Native register description structure */
86 struct etna_native_reg {
87 unsigned valid : 1;
88 unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89 unsigned rgroup : 3;
90 unsigned id : 9;
91 };
92
93 /* Register description */
94 struct etna_reg_desc {
95 enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96 int idx; /* index into file */
97 bool active; /* used in program */
98 int first_use; /* instruction id of first use (scope begin) */
99 int last_use; /* instruction id of last use (scope end, inclusive) */
100
101 struct etna_native_reg native; /* native register to map to */
102 unsigned usage_mask : 4; /* usage, per channel */
103 bool has_semantic; /* register has associated TGSI semantic */
104 struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105 struct tgsi_declaration_interp interp; /* Interpolation type */
106 };
107
108 /* Label information structure */
109 struct etna_compile_label {
110 int inst_idx; /* Instruction id that label points to */
111 };
112
113 enum etna_compile_frame_type {
114 ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115 ETNA_COMPILE_FRAME_LOOP,
116 };
117
118 /* nesting scope frame (LOOP, IF, ...) during compilation
119 */
120 struct etna_compile_frame {
121 enum etna_compile_frame_type type;
122 int lbl_else_idx;
123 int lbl_endif_idx;
124 int lbl_loop_bgn_idx;
125 int lbl_loop_end_idx;
126 };
127
128 struct etna_compile_file {
129 /* Number of registers in each TGSI file (max register+1) */
130 size_t reg_size;
131 /* Register descriptions, per register index */
132 struct etna_reg_desc *reg;
133 };
134
135 #define array_insert(arr, val) \
136 do { \
137 if (arr##_count == arr##_sz) { \
138 arr##_sz = MAX2(2 * arr##_sz, 16); \
139 arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140 } \
141 arr[arr##_count++] = val; \
142 } while (0)
143
144
145 /* scratch area for compiling shader, freed after compilation finishes */
146 struct etna_compile {
147 const struct tgsi_token *tokens;
148 bool free_tokens;
149
150 struct tgsi_shader_info info;
151
152 /* Register descriptions, per TGSI file, per register index */
153 struct etna_compile_file file[TGSI_FILE_COUNT];
154
155 /* Keep track of TGSI register declarations */
156 struct etna_reg_desc decl[ETNA_MAX_DECL];
157 uint total_decls;
158
159 /* Bitmap of dead instructions which are removed in a separate pass */
160 bool dead_inst[ETNA_MAX_TOKENS];
161
162 /* Immediate data */
163 enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
164 uint32_t imm_data[ETNA_MAX_IMM];
165 uint32_t imm_base; /* base of immediates (in 32 bit units) */
166 uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168 /* Next free native register, for register allocation */
169 uint32_t next_free_native;
170
171 /* Temporary register for use within translated TGSI instruction,
172 * only allocated when needed.
173 */
174 int inner_temps; /* number of inner temps used; only up to one available at
175 this point */
176 struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178 /* Fields for handling nested conditionals */
179 struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180 int frame_sp;
181 int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183 unsigned labels_count, labels_sz;
184 struct etna_compile_label *labels;
185
186 unsigned num_loops;
187
188 /* Code generation */
189 int inst_ptr; /* current instruction pointer */
190 uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191
192 /* I/O */
193
194 /* Number of varyings (PS only) */
195 int num_varyings;
196
197 /* GPU hardware specs */
198 const struct etna_specs *specs;
199
200 const struct etna_shader_key *key;
201 };
202
203 static struct etna_reg_desc *
204 etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205 {
206 return &c->file[dst.File].reg[dst.Index];
207 }
208
209 static struct etna_reg_desc *
210 etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211 {
212 return &c->file[src.File].reg[src.Index];
213 }
214
215 static struct etna_native_reg
216 etna_native_temp(unsigned reg)
217 {
218 return (struct etna_native_reg) {
219 .valid = 1,
220 .rgroup = INST_RGROUP_TEMP,
221 .id = reg
222 };
223 }
224
225 /** Register allocation **/
226 enum reg_sort_order {
227 FIRST_USE_ASC,
228 FIRST_USE_DESC,
229 LAST_USE_ASC,
230 LAST_USE_DESC
231 };
232
233 /* Augmented register description for sorting */
234 struct sort_rec {
235 struct etna_reg_desc *ptr;
236 int key;
237 };
238
239 static int
240 sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
241 {
242 if (a->key < b->key)
243 return -1;
244
245 if (a->key > b->key)
246 return 1;
247
248 return 0;
249 }
250
251 /* create an index on a register set based on certain criteria. */
252 static int
253 sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
254 enum reg_sort_order so)
255 {
256 struct etna_reg_desc *regs = file->reg;
257 int ptr = 0;
258
259 /* pre-populate keys from active registers */
260 for (int idx = 0; idx < file->reg_size; ++idx) {
261 /* only interested in active registers now; will only assign inactive ones
262 * if no space in active ones */
263 if (regs[idx].active) {
264 sorted[ptr].ptr = &regs[idx];
265
266 switch (so) {
267 case FIRST_USE_ASC:
268 sorted[ptr].key = regs[idx].first_use;
269 break;
270 case LAST_USE_ASC:
271 sorted[ptr].key = regs[idx].last_use;
272 break;
273 case FIRST_USE_DESC:
274 sorted[ptr].key = -regs[idx].first_use;
275 break;
276 case LAST_USE_DESC:
277 sorted[ptr].key = -regs[idx].last_use;
278 break;
279 }
280 ptr++;
281 }
282 }
283
284 /* sort index by key */
285 qsort(sorted, ptr, sizeof(struct sort_rec),
286 (int (*)(const void *, const void *))sort_rec_compar);
287
288 return ptr;
289 }
290
291 /* Allocate a new, unused, native temp register */
292 static struct etna_native_reg
293 alloc_new_native_reg(struct etna_compile *c)
294 {
295 assert(c->next_free_native < ETNA_MAX_TEMPS);
296 return etna_native_temp(c->next_free_native++);
297 }
298
299 /* assign TEMPs to native registers */
300 static void
301 assign_temporaries_to_native(struct etna_compile *c,
302 struct etna_compile_file *file)
303 {
304 struct etna_reg_desc *temps = file->reg;
305
306 for (int idx = 0; idx < file->reg_size; ++idx)
307 temps[idx].native = alloc_new_native_reg(c);
308 }
309
310 /* assign inputs and outputs to temporaries
311 * Gallium assumes that the hardware has separate registers for taking input and
312 * output, however Vivante GPUs use temporaries both for passing in inputs and
313 * passing back outputs.
314 * Try to re-use temporary registers where possible. */
315 static void
316 assign_inouts_to_temporaries(struct etna_compile *c, uint file)
317 {
318 bool mode_inputs = (file == TGSI_FILE_INPUT);
319 int inout_ptr = 0, num_inouts;
320 int temp_ptr = 0, num_temps;
321 struct sort_rec inout_order[ETNA_MAX_TEMPS];
322 struct sort_rec temps_order[ETNA_MAX_TEMPS];
323 num_inouts = sort_registers(inout_order, &c->file[file],
324 mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
325 num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
326 mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
327
328 while (inout_ptr < num_inouts && temp_ptr < num_temps) {
329 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
330 struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
331
332 if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
333 inout_ptr++;
334 continue;
335 }
336
337 /* last usage of this input is before or in same instruction of first use
338 * of temporary? */
339 if (mode_inputs ? (inout->last_use <= temp->first_use)
340 : (inout->first_use >= temp->last_use)) {
341 /* assign it and advance to next input */
342 inout->native = temp->native;
343 inout_ptr++;
344 }
345
346 temp_ptr++;
347 }
348
349 /* if we couldn't reuse current ones, allocate new temporaries */
350 for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
351 struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
352
353 if (inout->active && !inout->native.valid)
354 inout->native = alloc_new_native_reg(c);
355 }
356 }
357
358 /* Allocate an immediate with a certain value and return the index. If
359 * there is already an immediate with that value, return that.
360 */
361 static struct etna_inst_src
362 alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
363 uint32_t value)
364 {
365 int idx;
366
367 /* Could use a hash table to speed this up */
368 for (idx = 0; idx < c->imm_size; ++idx) {
369 if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
370 break;
371 }
372
373 /* look if there is an unused slot */
374 if (idx == c->imm_size) {
375 for (idx = 0; idx < c->imm_size; ++idx) {
376 if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
377 break;
378 }
379 }
380
381 /* allocate new immediate */
382 if (idx == c->imm_size) {
383 assert(c->imm_size < ETNA_MAX_IMM);
384 idx = c->imm_size++;
385 c->imm_data[idx] = value;
386 c->imm_contents[idx] = contents;
387 }
388
389 /* swizzle so that component with value is returned in all components */
390 idx += c->imm_base;
391 struct etna_inst_src imm_src = {
392 .use = 1,
393 .rgroup = INST_RGROUP_UNIFORM_0,
394 .reg = idx / 4,
395 .swiz = INST_SWIZ_BROADCAST(idx & 3)
396 };
397
398 return imm_src;
399 }
400
401 static struct etna_inst_src
402 alloc_imm_u32(struct etna_compile *c, uint32_t value)
403 {
404 return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
405 }
406
407 static struct etna_inst_src
408 alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
409 const uint32_t *values)
410 {
411 struct etna_inst_src imm_src = { };
412 int idx, i;
413
414 for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
415 /* What if we can use a uniform with a different swizzle? */
416 for (i = 0; i < 4; i++)
417 if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
418 break;
419 if (i == 4)
420 break;
421 }
422
423 if (idx + 3 >= c->imm_size) {
424 idx = align(c->imm_size, 4);
425 assert(idx + 4 <= ETNA_MAX_IMM);
426
427 for (i = 0; i < 4; i++) {
428 c->imm_data[idx + i] = values[i];
429 c->imm_contents[idx + i] = contents;
430 }
431
432 c->imm_size = idx + 4;
433 }
434
435 assert((c->imm_base & 3) == 0);
436 idx += c->imm_base;
437 imm_src.use = 1;
438 imm_src.rgroup = INST_RGROUP_UNIFORM_0;
439 imm_src.reg = idx / 4;
440 imm_src.swiz = INST_SWIZ_IDENTITY;
441
442 return imm_src;
443 }
444
445 static uint32_t
446 get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
447 unsigned swiz_idx)
448 {
449 assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
450 unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
451
452 return c->imm_data[idx];
453 }
454
455 /* Allocate immediate with a certain float value. If there is already an
456 * immediate with that value, return that.
457 */
458 static struct etna_inst_src
459 alloc_imm_f32(struct etna_compile *c, float value)
460 {
461 return alloc_imm_u32(c, fui(value));
462 }
463
464 static struct etna_inst_src
465 etna_imm_vec4f(struct etna_compile *c, const float *vec4)
466 {
467 uint32_t val[4];
468
469 for (int i = 0; i < 4; i++)
470 val[i] = fui(vec4[i]);
471
472 return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
473 }
474
475 /* Pass -- check register file declarations and immediates */
476 static void
477 etna_compile_parse_declarations(struct etna_compile *c)
478 {
479 struct tgsi_parse_context ctx = { };
480 unsigned status = TGSI_PARSE_OK;
481 status = tgsi_parse_init(&ctx, c->tokens);
482 assert(status == TGSI_PARSE_OK);
483
484 while (!tgsi_parse_end_of_tokens(&ctx)) {
485 tgsi_parse_token(&ctx);
486
487 switch (ctx.FullToken.Token.Type) {
488 case TGSI_TOKEN_TYPE_IMMEDIATE: {
489 /* immediates are handled differently from other files; they are
490 * not declared explicitly, and always add four components */
491 const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
492 assert(c->imm_size <= (ETNA_MAX_IMM - 4));
493
494 for (int i = 0; i < 4; ++i) {
495 unsigned idx = c->imm_size++;
496
497 c->imm_data[idx] = imm->u[i].Uint;
498 c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
499 }
500 }
501 break;
502 }
503 }
504
505 tgsi_parse_free(&ctx);
506 }
507
508 /* Allocate register declarations for the registers in all register files */
509 static void
510 etna_allocate_decls(struct etna_compile *c)
511 {
512 uint idx = 0;
513
514 for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
515 c->file[x].reg = &c->decl[idx];
516 c->file[x].reg_size = c->info.file_max[x] + 1;
517
518 for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
519 c->decl[idx].file = x;
520 c->decl[idx].idx = sub;
521 idx++;
522 }
523 }
524
525 c->total_decls = idx;
526 }
527
528 /* Pass -- check and record usage of temporaries, inputs, outputs */
529 static void
530 etna_compile_pass_check_usage(struct etna_compile *c)
531 {
532 struct tgsi_parse_context ctx = { };
533 unsigned status = TGSI_PARSE_OK;
534 status = tgsi_parse_init(&ctx, c->tokens);
535 assert(status == TGSI_PARSE_OK);
536
537 for (int idx = 0; idx < c->total_decls; ++idx) {
538 c->decl[idx].active = false;
539 c->decl[idx].first_use = c->decl[idx].last_use = -1;
540 }
541
542 int inst_idx = 0;
543 while (!tgsi_parse_end_of_tokens(&ctx)) {
544 tgsi_parse_token(&ctx);
545 /* find out max register #s used
546 * For every register mark first and last instruction index where it's
547 * used this allows finding ranges where the temporary can be borrowed
548 * as input and/or output register
549 *
550 * XXX in the case of loops this needs special care, or even be completely
551 * disabled, as
552 * the last usage of a register inside a loop means it can still be used
553 * on next loop
554 * iteration (execution is no longer * chronological). The register can
555 * only be
556 * declared "free" after the loop finishes.
557 *
558 * Same for inputs: the first usage of a register inside a loop doesn't
559 * mean that the register
560 * won't have been overwritten in previous iteration. The register can
561 * only be declared free before the loop
562 * starts.
563 * The proper way would be to do full dominator / post-dominator analysis
564 * (especially with more complicated
565 * control flow such as direct branch instructions) but not for now...
566 */
567 switch (ctx.FullToken.Token.Type) {
568 case TGSI_TOKEN_TYPE_DECLARATION: {
569 /* Declaration: fill in file details */
570 const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
571 struct etna_compile_file *file = &c->file[decl->Declaration.File];
572
573 for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
574 file->reg[idx].usage_mask = 0; // we'll compute this ourselves
575 file->reg[idx].has_semantic = decl->Declaration.Semantic;
576 file->reg[idx].semantic = decl->Semantic;
577 file->reg[idx].interp = decl->Interp;
578 }
579 } break;
580 case TGSI_TOKEN_TYPE_INSTRUCTION: {
581 /* Instruction: iterate over operands of instruction */
582 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
583
584 /* iterate over destination registers */
585 for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
586 struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
587
588 if (reg_desc->first_use == -1)
589 reg_desc->first_use = inst_idx;
590
591 reg_desc->last_use = inst_idx;
592 reg_desc->active = true;
593 }
594
595 /* iterate over source registers */
596 for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
597 struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
598
599 if (reg_desc->first_use == -1)
600 reg_desc->first_use = inst_idx;
601
602 reg_desc->last_use = inst_idx;
603 reg_desc->active = true;
604 /* accumulate usage mask for register, this is used to determine how
605 * many slots for varyings
606 * should be allocated */
607 reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
608 }
609 inst_idx += 1;
610 } break;
611 default:
612 break;
613 }
614 }
615
616 tgsi_parse_free(&ctx);
617 }
618
619 /* assign inputs that need to be assigned to specific registers */
620 static void
621 assign_special_inputs(struct etna_compile *c)
622 {
623 if (c->info.processor == PIPE_SHADER_FRAGMENT) {
624 /* never assign t0 as it is the position output, start assigning at t1 */
625 c->next_free_native = 1;
626
627 /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
628 for (int idx = 0; idx < c->total_decls; ++idx) {
629 struct etna_reg_desc *reg = &c->decl[idx];
630
631 if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
632 reg->native = etna_native_temp(0);
633 }
634 }
635 }
636
637 /* Check that a move instruction does not swizzle any of the components
638 * that it writes.
639 */
640 static bool
641 etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
642 const struct tgsi_src_register src)
643 {
644 return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
645 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
646 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
647 (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
648 }
649
650 /* Pass -- optimize outputs
651 * Mesa tends to generate code like this at the end if their shaders
652 * MOV OUT[1], TEMP[2]
653 * MOV OUT[0], TEMP[0]
654 * MOV OUT[2], TEMP[1]
655 * Recognize if
656 * a) there is only a single assignment to an output register and
657 * b) the temporary is not used after that
658 * Also recognize direct assignment of IN to OUT (passthrough)
659 **/
660 static void
661 etna_compile_pass_optimize_outputs(struct etna_compile *c)
662 {
663 struct tgsi_parse_context ctx = { };
664 int inst_idx = 0;
665 unsigned status = TGSI_PARSE_OK;
666 status = tgsi_parse_init(&ctx, c->tokens);
667 assert(status == TGSI_PARSE_OK);
668
669 while (!tgsi_parse_end_of_tokens(&ctx)) {
670 tgsi_parse_token(&ctx);
671
672 switch (ctx.FullToken.Token.Type) {
673 case TGSI_TOKEN_TYPE_INSTRUCTION: {
674 const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
675
676 /* iterate over operands */
677 switch (inst->Instruction.Opcode) {
678 case TGSI_OPCODE_MOV: {
679 /* We are only interested in eliminating MOVs which write to
680 * the shader outputs. Test for this early. */
681 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
682 break;
683 /* Elimination of a MOV must have no visible effect on the
684 * resulting shader: this means the MOV must not swizzle or
685 * saturate, and its source must not have the negate or
686 * absolute modifiers. */
687 if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
688 inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
689 inst->Src[0].Register.Absolute)
690 break;
691
692 uint out_idx = inst->Dst[0].Register.Index;
693 uint in_idx = inst->Src[0].Register.Index;
694 /* assignment of temporary to output --
695 * and the output doesn't yet have a native register assigned
696 * and the last use of the temporary is this instruction
697 * and the MOV does not do a swizzle
698 */
699 if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
700 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
701 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
702 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
703 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
704 /* prevent temp from being re-used for the rest of the shader */
705 c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
706 /* mark this MOV instruction as a no-op */
707 c->dead_inst[inst_idx] = true;
708 }
709 /* direct assignment of input to output --
710 * and the input or output doesn't yet have a native register
711 * assigned
712 * and the output is only used in this instruction,
713 * allocate a new register, and associate both input and output to
714 * it
715 * and the MOV does not do a swizzle
716 */
717 if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
718 !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
719 !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
720 c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
721 c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
722 c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
723 c->file[TGSI_FILE_INPUT].reg[in_idx].native =
724 alloc_new_native_reg(c);
725 /* mark this MOV instruction as a no-op */
726 c->dead_inst[inst_idx] = true;
727 }
728 } break;
729 default:;
730 }
731 inst_idx += 1;
732 } break;
733 }
734 }
735
736 tgsi_parse_free(&ctx);
737 }
738
739 /* Get a temporary to be used within one TGSI instruction.
740 * The first time that this function is called the temporary will be allocated.
741 * Each call to this function will return the same temporary.
742 */
743 static struct etna_native_reg
744 etna_compile_get_inner_temp(struct etna_compile *c)
745 {
746 int inner_temp = c->inner_temps;
747
748 if (inner_temp < ETNA_MAX_INNER_TEMPS) {
749 if (!c->inner_temp[inner_temp].valid)
750 c->inner_temp[inner_temp] = alloc_new_native_reg(c);
751
752 /* alloc_new_native_reg() handles lack of registers */
753 c->inner_temps += 1;
754 } else {
755 BUG("Too many inner temporaries (%i) requested in one instruction",
756 inner_temp + 1);
757 }
758
759 return c->inner_temp[inner_temp];
760 }
761
762 static struct etna_inst_dst
763 etna_native_to_dst(struct etna_native_reg native, unsigned comps)
764 {
765 /* Can only assign to temporaries */
766 assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
767
768 struct etna_inst_dst rv = {
769 .comps = comps,
770 .use = 1,
771 .reg = native.id,
772 };
773
774 return rv;
775 }
776
777 static struct etna_inst_src
778 etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
779 {
780 assert(native.valid && !native.is_tex);
781
782 struct etna_inst_src rv = {
783 .use = 1,
784 .swiz = swizzle,
785 .rgroup = native.rgroup,
786 .reg = native.id,
787 .amode = INST_AMODE_DIRECT,
788 };
789
790 return rv;
791 }
792
793 static inline struct etna_inst_src
794 negate(struct etna_inst_src src)
795 {
796 src.neg = !src.neg;
797
798 return src;
799 }
800
801 static inline struct etna_inst_src
802 absolute(struct etna_inst_src src)
803 {
804 src.abs = 1;
805
806 return src;
807 }
808
809 static inline struct etna_inst_src
810 swizzle(struct etna_inst_src src, unsigned swizzle)
811 {
812 src.swiz = inst_swiz_compose(src.swiz, swizzle);
813
814 return src;
815 }
816
817 /* Emit instruction and append it to program */
818 static void
819 emit_inst(struct etna_compile *c, struct etna_inst *inst)
820 {
821 assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
822
823 /* Check for uniform conflicts (each instruction can only access one
824 * uniform),
825 * if detected, use an intermediate temporary */
826 unsigned uni_rgroup = -1;
827 unsigned uni_reg = -1;
828
829 for (int src = 0; src < ETNA_NUM_SRC; ++src) {
830 if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
831 if (uni_reg == -1) { /* first unique uniform used */
832 uni_rgroup = inst->src[src].rgroup;
833 uni_reg = inst->src[src].reg;
834 } else { /* second or later; check that it is a re-use */
835 if (uni_rgroup != inst->src[src].rgroup ||
836 uni_reg != inst->src[src].reg) {
837 DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
838 "accesses different uniforms, "
839 "need to generate extra MOV");
840 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
841
842 /* Generate move instruction to temporary */
843 etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
844 .opcode = INST_OPCODE_MOV,
845 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
846 INST_COMPS_Z | INST_COMPS_W),
847 .src[2] = inst->src[src]
848 });
849
850 c->inst_ptr++;
851
852 /* Modify instruction to use temp register instead of uniform */
853 inst->src[src].use = 1;
854 inst->src[src].rgroup = INST_RGROUP_TEMP;
855 inst->src[src].reg = inner_temp.id;
856 inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
857 inst->src[src].neg = 0; /* negation happens on MOV */
858 inst->src[src].abs = 0; /* abs happens on MOV */
859 inst->src[src].amode = 0; /* amode effects happen on MOV */
860 }
861 }
862 }
863 }
864
865 /* Finally assemble the actual instruction */
866 etna_assemble(&c->code[c->inst_ptr * 4], inst);
867 c->inst_ptr++;
868 }
869
870 static unsigned int
871 etna_amode(struct tgsi_ind_register indirect)
872 {
873 assert(indirect.File == TGSI_FILE_ADDRESS);
874 assert(indirect.Index == 0);
875
876 switch (indirect.Swizzle) {
877 case TGSI_SWIZZLE_X:
878 return INST_AMODE_ADD_A_X;
879 case TGSI_SWIZZLE_Y:
880 return INST_AMODE_ADD_A_Y;
881 case TGSI_SWIZZLE_Z:
882 return INST_AMODE_ADD_A_Z;
883 case TGSI_SWIZZLE_W:
884 return INST_AMODE_ADD_A_W;
885 default:
886 assert(!"Invalid swizzle");
887 }
888
889 unreachable("bad swizzle");
890 }
891
892 /* convert destination operand */
893 static struct etna_inst_dst
894 convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
895 {
896 struct etna_inst_dst rv = {
897 /// XXX .amode
898 .comps = in->Register.WriteMask,
899 };
900
901 if (in->Register.File == TGSI_FILE_ADDRESS) {
902 assert(in->Register.Index == 0);
903 rv.reg = in->Register.Index;
904 rv.use = 0;
905 } else {
906 rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
907 in->Register.WriteMask);
908 }
909
910 if (in->Register.Indirect)
911 rv.amode = etna_amode(in->Indirect);
912
913 return rv;
914 }
915
916 /* convert texture operand */
917 static struct etna_inst_tex
918 convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
919 const struct tgsi_instruction_texture *tex)
920 {
921 struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
922 struct etna_inst_tex rv = {
923 // XXX .amode (to allow for an array of samplers?)
924 .swiz = INST_SWIZ_IDENTITY
925 };
926
927 assert(native_reg.is_tex && native_reg.valid);
928 rv.id = native_reg.id;
929
930 return rv;
931 }
932
933 /* convert source operand */
934 static struct etna_inst_src
935 etna_create_src(const struct tgsi_full_src_register *tgsi,
936 const struct etna_native_reg *native)
937 {
938 const struct tgsi_src_register *reg = &tgsi->Register;
939 struct etna_inst_src rv = {
940 .use = 1,
941 .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
942 .neg = reg->Negate,
943 .abs = reg->Absolute,
944 .rgroup = native->rgroup,
945 .reg = native->id,
946 .amode = INST_AMODE_DIRECT,
947 };
948
949 assert(native->valid && !native->is_tex);
950
951 if (reg->Indirect)
952 rv.amode = etna_amode(tgsi->Indirect);
953
954 return rv;
955 }
956
957 static struct etna_inst_src
958 etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
959 struct etna_native_reg temp)
960 {
961 struct etna_inst mov = { };
962
963 mov.opcode = INST_OPCODE_MOV;
964 mov.sat = 0;
965 mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
966 INST_COMPS_Z | INST_COMPS_W);
967 mov.src[2] = src;
968 emit_inst(c, &mov);
969
970 src.swiz = INST_SWIZ_IDENTITY;
971 src.neg = src.abs = 0;
972 src.rgroup = temp.rgroup;
973 src.reg = temp.id;
974
975 return src;
976 }
977
978 static struct etna_inst_src
979 etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
980 {
981 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
982
983 return etna_mov_src_to_temp(c, src, temp);
984 }
985
986 static bool
987 etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
988 {
989 return etna_rgroup_is_uniform(a.rgroup) &&
990 etna_rgroup_is_uniform(b.rgroup) &&
991 (a.rgroup != b.rgroup || a.reg != b.reg);
992 }
993
994 /* create a new label */
995 static unsigned int
996 alloc_new_label(struct etna_compile *c)
997 {
998 struct etna_compile_label label = {
999 .inst_idx = -1, /* start by point to no specific instruction */
1000 };
1001
1002 array_insert(c->labels, label);
1003
1004 return c->labels_count - 1;
1005 }
1006
1007 /* place label at current instruction pointer */
1008 static void
1009 label_place(struct etna_compile *c, struct etna_compile_label *label)
1010 {
1011 label->inst_idx = c->inst_ptr;
1012 }
1013
1014 /* mark label use at current instruction.
1015 * target of the label will be filled in in the marked instruction's src2.imm
1016 * slot as soon
1017 * as the value becomes known.
1018 */
1019 static void
1020 label_mark_use(struct etna_compile *c, int lbl_idx)
1021 {
1022 assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1023 c->lbl_usage[c->inst_ptr] = lbl_idx;
1024 }
1025
1026 /* walk the frame stack and return first frame with matching type */
1027 static struct etna_compile_frame *
1028 find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1029 {
1030 for (int sp = c->frame_sp; sp >= 0; sp--)
1031 if (c->frame_stack[sp].type == type)
1032 return &c->frame_stack[sp];
1033
1034 assert(0);
1035 return NULL;
1036 }
1037
1038 struct instr_translater {
1039 void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1040 const struct tgsi_full_instruction *inst,
1041 struct etna_inst_src *src);
1042 unsigned tgsi_opc;
1043 uint8_t opc;
1044
1045 /* tgsi src -> etna src swizzle */
1046 int src[3];
1047
1048 unsigned cond;
1049 };
1050
1051 static void
1052 trans_instr(const struct instr_translater *t, struct etna_compile *c,
1053 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1054 {
1055 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1056 struct etna_inst instr = { };
1057
1058 instr.opcode = t->opc;
1059 instr.cond = t->cond;
1060 instr.sat = inst->Instruction.Saturate;
1061
1062 assert(info->num_dst <= 1);
1063 if (info->num_dst)
1064 instr.dst = convert_dst(c, &inst->Dst[0]);
1065
1066 assert(info->num_src <= ETNA_NUM_SRC);
1067
1068 for (unsigned i = 0; i < info->num_src; i++) {
1069 int swizzle = t->src[i];
1070
1071 assert(swizzle != -1);
1072 instr.src[swizzle] = src[i];
1073 }
1074
1075 emit_inst(c, &instr);
1076 }
1077
1078 static void
1079 trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1080 const struct tgsi_full_instruction *inst,
1081 struct etna_inst_src *src)
1082 {
1083 emit_inst(c, &(struct etna_inst) {
1084 .opcode = INST_OPCODE_SELECT,
1085 .cond = t->cond,
1086 .sat = inst->Instruction.Saturate,
1087 .dst = convert_dst(c, &inst->Dst[0]),
1088 .src[0] = src[0],
1089 .src[1] = src[1],
1090 .src[2] = src[0],
1091 });
1092 }
1093
1094 static void
1095 trans_if(const struct instr_translater *t, struct etna_compile *c,
1096 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1097 {
1098 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1099 struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1100
1101 /* push IF to stack */
1102 f->type = ETNA_COMPILE_FRAME_IF;
1103 /* create "else" label */
1104 f->lbl_else_idx = alloc_new_label(c);
1105 f->lbl_endif_idx = -1;
1106
1107 /* We need to avoid the emit_inst() below becoming two instructions */
1108 if (etna_src_uniforms_conflict(src[0], imm_0))
1109 src[0] = etna_mov_src(c, src[0]);
1110
1111 /* mark position in instruction stream of label reference so that it can be
1112 * filled in in next pass */
1113 label_mark_use(c, f->lbl_else_idx);
1114
1115 /* create conditional branch to label if src0 EQ 0 */
1116 emit_inst(c, &(struct etna_inst){
1117 .opcode = INST_OPCODE_BRANCH,
1118 .cond = INST_CONDITION_EQ,
1119 .src[0] = src[0],
1120 .src[1] = imm_0,
1121 /* imm is filled in later */
1122 });
1123 }
1124
1125 static void
1126 trans_else(const struct instr_translater *t, struct etna_compile *c,
1127 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1128 {
1129 assert(c->frame_sp > 0);
1130 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1131 assert(f->type == ETNA_COMPILE_FRAME_IF);
1132
1133 /* create "endif" label, and branch to endif label */
1134 f->lbl_endif_idx = alloc_new_label(c);
1135 label_mark_use(c, f->lbl_endif_idx);
1136 emit_inst(c, &(struct etna_inst) {
1137 .opcode = INST_OPCODE_BRANCH,
1138 .cond = INST_CONDITION_TRUE,
1139 /* imm is filled in later */
1140 });
1141
1142 /* mark "else" label at this position in instruction stream */
1143 label_place(c, &c->labels[f->lbl_else_idx]);
1144 }
1145
1146 static void
1147 trans_endif(const struct instr_translater *t, struct etna_compile *c,
1148 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1149 {
1150 assert(c->frame_sp > 0);
1151 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1152 assert(f->type == ETNA_COMPILE_FRAME_IF);
1153
1154 /* assign "endif" or "else" (if no ELSE) label to current position in
1155 * instruction stream, pop IF */
1156 if (f->lbl_endif_idx != -1)
1157 label_place(c, &c->labels[f->lbl_endif_idx]);
1158 else
1159 label_place(c, &c->labels[f->lbl_else_idx]);
1160 }
1161
1162 static void
1163 trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1164 const struct tgsi_full_instruction *inst,
1165 struct etna_inst_src *src)
1166 {
1167 struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1168
1169 /* push LOOP to stack */
1170 f->type = ETNA_COMPILE_FRAME_LOOP;
1171 f->lbl_loop_bgn_idx = alloc_new_label(c);
1172 f->lbl_loop_end_idx = alloc_new_label(c);
1173
1174 label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1175
1176 c->num_loops++;
1177 }
1178
1179 static void
1180 trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1181 const struct tgsi_full_instruction *inst,
1182 struct etna_inst_src *src)
1183 {
1184 assert(c->frame_sp > 0);
1185 struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1186 assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1187
1188 /* mark position in instruction stream of label reference so that it can be
1189 * filled in in next pass */
1190 label_mark_use(c, f->lbl_loop_bgn_idx);
1191
1192 /* create branch to loop_bgn label */
1193 emit_inst(c, &(struct etna_inst) {
1194 .opcode = INST_OPCODE_BRANCH,
1195 .cond = INST_CONDITION_TRUE,
1196 .src[0] = src[0],
1197 /* imm is filled in later */
1198 });
1199
1200 label_place(c, &c->labels[f->lbl_loop_end_idx]);
1201 }
1202
1203 static void
1204 trans_brk(const struct instr_translater *t, struct etna_compile *c,
1205 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1206 {
1207 assert(c->frame_sp > 0);
1208 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1209
1210 /* mark position in instruction stream of label reference so that it can be
1211 * filled in in next pass */
1212 label_mark_use(c, f->lbl_loop_end_idx);
1213
1214 /* create branch to loop_end label */
1215 emit_inst(c, &(struct etna_inst) {
1216 .opcode = INST_OPCODE_BRANCH,
1217 .cond = INST_CONDITION_TRUE,
1218 .src[0] = src[0],
1219 /* imm is filled in later */
1220 });
1221 }
1222
1223 static void
1224 trans_cont(const struct instr_translater *t, struct etna_compile *c,
1225 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1226 {
1227 assert(c->frame_sp > 0);
1228 struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1229
1230 /* mark position in instruction stream of label reference so that it can be
1231 * filled in in next pass */
1232 label_mark_use(c, f->lbl_loop_bgn_idx);
1233
1234 /* create branch to loop_end label */
1235 emit_inst(c, &(struct etna_inst) {
1236 .opcode = INST_OPCODE_BRANCH,
1237 .cond = INST_CONDITION_TRUE,
1238 .src[0] = src[0],
1239 /* imm is filled in later */
1240 });
1241 }
1242
1243 static void
1244 trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1245 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1246 {
1247 emit_inst(c, &(struct etna_inst) {
1248 .opcode = t->opc,
1249 .sat = inst->Instruction.Saturate,
1250 .dst = convert_dst(c, &inst->Dst[0]),
1251 .src[0] = src[0],
1252 .src[2] = src[0],
1253 });
1254 }
1255
1256 static void
1257 trans_arl(const struct instr_translater *t, struct etna_compile *c,
1258 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1259 {
1260 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1261 struct etna_inst arl = { };
1262 struct etna_inst_dst dst;
1263
1264 dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1265 INST_COMPS_W);
1266
1267 if (c->specs->has_sign_floor_ceil) {
1268 struct etna_inst floor = { };
1269
1270 floor.opcode = INST_OPCODE_FLOOR;
1271 floor.src[2] = src[0];
1272 floor.dst = dst;
1273
1274 emit_inst(c, &floor);
1275 } else {
1276 struct etna_inst floor[2] = { };
1277
1278 floor[0].opcode = INST_OPCODE_FRC;
1279 floor[0].sat = inst->Instruction.Saturate;
1280 floor[0].dst = dst;
1281 floor[0].src[2] = src[0];
1282
1283 floor[1].opcode = INST_OPCODE_ADD;
1284 floor[1].sat = inst->Instruction.Saturate;
1285 floor[1].dst = dst;
1286 floor[1].src[0] = src[0];
1287 floor[1].src[2].use = 1;
1288 floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1289 floor[1].src[2].neg = 1;
1290 floor[1].src[2].rgroup = temp.rgroup;
1291 floor[1].src[2].reg = temp.id;
1292
1293 emit_inst(c, &floor[0]);
1294 emit_inst(c, &floor[1]);
1295 }
1296
1297 arl.opcode = INST_OPCODE_MOVAR;
1298 arl.sat = inst->Instruction.Saturate;
1299 arl.dst = convert_dst(c, &inst->Dst[0]);
1300 arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1301
1302 emit_inst(c, &arl);
1303 }
1304
1305 static void
1306 trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1307 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1308 {
1309 /* dst = src0 * src1 + (1 - src0) * src2
1310 * => src0 * src1 - (src0 - 1) * src2
1311 * => src0 * src1 - (src0 * src2 - src2)
1312 * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1313 * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1314 */
1315 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1316 if (etna_src_uniforms_conflict(src[0], src[1]) ||
1317 etna_src_uniforms_conflict(src[0], src[2])) {
1318 src[0] = etna_mov_src(c, src[0]);
1319 }
1320
1321 struct etna_inst mad[2] = { };
1322 mad[0].opcode = INST_OPCODE_MAD;
1323 mad[0].sat = 0;
1324 mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1325 INST_COMPS_Z | INST_COMPS_W);
1326 mad[0].src[0] = src[0];
1327 mad[0].src[1] = src[2];
1328 mad[0].src[2] = negate(src[2]);
1329 mad[1].opcode = INST_OPCODE_MAD;
1330 mad[1].sat = inst->Instruction.Saturate;
1331 mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1332 mad[1].src[1] = src[1];
1333 mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1334
1335 emit_inst(c, &mad[0]);
1336 emit_inst(c, &mad[1]);
1337 }
1338
1339 static void
1340 trans_lit(const struct instr_translater *t, struct etna_compile *c,
1341 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1342 {
1343 /* SELECT.LT tmp._y__, 0, src.yyyy, 0
1344 * - can be eliminated if src.y is a uniform and >= 0
1345 * SELECT.GT tmp.___w, 128, src.wwww, 128
1346 * SELECT.LT tmp.___w, -128, tmp.wwww, -128
1347 * - can be eliminated if src.w is a uniform and fits clamp
1348 * LOG tmp.x, void, void, tmp.yyyy
1349 * MUL tmp.x, tmp.xxxx, tmp.wwww, void
1350 * LITP dst, undef, src.xxxx, tmp.xxxx
1351 */
1352 struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1353 struct etna_inst_src src_y = { };
1354
1355 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1356 src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1357
1358 struct etna_inst ins = { };
1359 ins.opcode = INST_OPCODE_SELECT;
1360 ins.cond = INST_CONDITION_LT;
1361 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1362 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1363 ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1364 emit_inst(c, &ins);
1365 } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1366 src_y = alloc_imm_f32(c, 0.0);
1367 else
1368 src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1369
1370 struct etna_inst_src src_w = { };
1371
1372 if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1373 src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1374
1375 struct etna_inst ins = { };
1376 ins.opcode = INST_OPCODE_SELECT;
1377 ins.cond = INST_CONDITION_GT;
1378 ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1379 ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1380 ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1381 emit_inst(c, &ins);
1382 ins.cond = INST_CONDITION_LT;
1383 ins.src[0].neg = !ins.src[0].neg;
1384 ins.src[2].neg = !ins.src[2].neg;
1385 ins.src[1] = src_w;
1386 emit_inst(c, &ins);
1387 } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1388 src_w = alloc_imm_f32(c, -128.);
1389 else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1390 src_w = alloc_imm_f32(c, 128.);
1391 else
1392 src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1393
1394 if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1395 emit_inst(c, &(struct etna_inst) {
1396 .opcode = INST_OPCODE_LOG,
1397 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1398 .src[2] = src_y,
1399 .tex = { .amode=1 }, /* Unknown bit needs to be set */
1400 });
1401 emit_inst(c, &(struct etna_inst) {
1402 .opcode = INST_OPCODE_MUL,
1403 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1404 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1405 .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1406 });
1407 } else {
1408 struct etna_inst ins[3] = { };
1409 ins[0].opcode = INST_OPCODE_LOG;
1410 ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1411 ins[0].src[2] = src_y;
1412
1413 emit_inst(c, &ins[0]);
1414 }
1415 emit_inst(c, &(struct etna_inst) {
1416 .opcode = INST_OPCODE_MUL,
1417 .sat = 0,
1418 .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1419 .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1420 .src[1] = src_w,
1421 });
1422 emit_inst(c, &(struct etna_inst) {
1423 .opcode = INST_OPCODE_LITP,
1424 .sat = 0,
1425 .dst = convert_dst(c, &inst->Dst[0]),
1426 .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1427 .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1428 .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1429 });
1430 }
1431
1432 static void
1433 trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1434 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1435 {
1436 if (c->specs->has_sign_floor_ceil) {
1437 emit_inst(c, &(struct etna_inst){
1438 .opcode = INST_OPCODE_SIGN,
1439 .sat = inst->Instruction.Saturate,
1440 .dst = convert_dst(c, &inst->Dst[0]),
1441 .src[2] = src[0],
1442 });
1443 } else {
1444 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1445 struct etna_inst ins[2] = { };
1446
1447 ins[0].opcode = INST_OPCODE_SET;
1448 ins[0].cond = INST_CONDITION_NZ;
1449 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1450 INST_COMPS_Z | INST_COMPS_W);
1451 ins[0].src[0] = src[0];
1452
1453 ins[1].opcode = INST_OPCODE_SELECT;
1454 ins[1].cond = INST_CONDITION_LZ;
1455 ins[1].sat = inst->Instruction.Saturate;
1456 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1457 ins[1].src[0] = src[0];
1458 ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1459 ins[1].src[1] = negate(ins[1].src[2]);
1460
1461 emit_inst(c, &ins[0]);
1462 emit_inst(c, &ins[1]);
1463 }
1464 }
1465
1466 static void
1467 trans_trig(const struct instr_translater *t, struct etna_compile *c,
1468 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1469 {
1470 if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1471 /* On newer chips alternative SIN/COS instructions are implemented,
1472 * which:
1473 * - Need their input scaled by 1/pi instead of 2/pi
1474 * - Output an x and y component, which need to be multiplied to
1475 * get the result
1476 */
1477 /* TGSI lowering should deal with SCS */
1478 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1479
1480 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1481 emit_inst(c, &(struct etna_inst) {
1482 .opcode = INST_OPCODE_MUL,
1483 .sat = 0,
1484 .dst = etna_native_to_dst(temp, INST_COMPS_Z),
1485 .src[0] = src[0], /* any swizzling happens here */
1486 .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1487 });
1488 emit_inst(c, &(struct etna_inst) {
1489 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1490 ? INST_OPCODE_COS
1491 : INST_OPCODE_SIN,
1492 .sat = 0,
1493 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1494 .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1495 .tex = { .amode=1 }, /* Unknown bit needs to be set */
1496 });
1497 emit_inst(c, &(struct etna_inst) {
1498 .opcode = INST_OPCODE_MUL,
1499 .sat = inst->Instruction.Saturate,
1500 .dst = convert_dst(c, &inst->Dst[0]),
1501 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1502 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1503 });
1504
1505 } else if (c->specs->has_sin_cos_sqrt) {
1506 /* TGSI lowering should deal with SCS */
1507 assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
1508
1509 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1510 /* add divide by PI/2, using a temp register. GC2000
1511 * fails with src==dst for the trig instruction. */
1512 emit_inst(c, &(struct etna_inst) {
1513 .opcode = INST_OPCODE_MUL,
1514 .sat = 0,
1515 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1516 INST_COMPS_Z | INST_COMPS_W),
1517 .src[0] = src[0], /* any swizzling happens here */
1518 .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1519 });
1520 emit_inst(c, &(struct etna_inst) {
1521 .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1522 ? INST_OPCODE_COS
1523 : INST_OPCODE_SIN,
1524 .sat = inst->Instruction.Saturate,
1525 .dst = convert_dst(c, &inst->Dst[0]),
1526 .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1527 });
1528 } else {
1529 /* Implement Nick's fast sine/cosine. Taken from:
1530 * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1531 * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1532 * MAD t.x_zw, src.xxxx, A, B
1533 * FRC t.x_z_, void, void, t.xwzw
1534 * MAD t.x_z_, t.xwzw, 2, -1
1535 * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
1536 * DP3 t.x_z_, t.zyww, C, void (for sin)
1537 * DP3 t.__z_, t.zyww, C, void (for scs)
1538 * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
1539 * DP3 t.x_z_, t.xyww, C, void (for cos)
1540 * DP3 t.x___, t.xyww, C, void (for scs)
1541 * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1542 * MAD dst, t.ywyw, .2225, t.xzxz
1543 *
1544 * TODO: we don't set dst.zw correctly for SCS.
1545 */
1546 struct etna_inst *p, ins[9] = { };
1547 struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1548 struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1549 struct etna_inst_src sincos[3], in = src[0];
1550 sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1551 sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1552
1553 /* A uniform source will cause the inner temp limit to
1554 * be exceeded. Explicitly deal with that scenario.
1555 */
1556 if (etna_rgroup_is_uniform(src[0].rgroup)) {
1557 struct etna_inst ins = { };
1558 ins.opcode = INST_OPCODE_MOV;
1559 ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1560 ins.src[2] = in;
1561 emit_inst(c, &ins);
1562 in = t0s;
1563 }
1564
1565 ins[0].opcode = INST_OPCODE_MAD;
1566 ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1567 ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1568 ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1569 ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1570
1571 ins[1].opcode = INST_OPCODE_FRC;
1572 ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1573 ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1574
1575 ins[2].opcode = INST_OPCODE_MAD;
1576 ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1577 ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1578 ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1579 ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1580
1581 unsigned mul_swiz, dp3_swiz;
1582 if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1583 mul_swiz = SWIZZLE(W, Z, W, W);
1584 dp3_swiz = SWIZZLE(Z, Y, W, W);
1585 } else {
1586 mul_swiz = SWIZZLE(W, X, W, W);
1587 dp3_swiz = SWIZZLE(X, Y, W, W);
1588 }
1589
1590 ins[3].opcode = INST_OPCODE_MUL;
1591 ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1592 ins[3].src[0] = swizzle(t0s, mul_swiz);
1593 ins[3].src[1] = absolute(ins[3].src[0]);
1594
1595 ins[4].opcode = INST_OPCODE_DP3;
1596 ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1597 ins[4].src[0] = swizzle(t0s, dp3_swiz);
1598 ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1599
1600 if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
1601 ins[5] = ins[3];
1602 ins[6] = ins[4];
1603 ins[4].dst.comps = INST_COMPS_X;
1604 ins[6].dst.comps = INST_COMPS_Z;
1605 ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
1606 ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
1607 ins[5].src[1] = absolute(ins[5].src[0]);
1608 p = &ins[7];
1609 } else {
1610 p = &ins[5];
1611 }
1612
1613 p->opcode = INST_OPCODE_MAD;
1614 p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1615 p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1616 p->src[1] = absolute(p->src[0]);
1617 p->src[2] = negate(p->src[0]);
1618
1619 p++;
1620 p->opcode = INST_OPCODE_MAD;
1621 p->sat = inst->Instruction.Saturate;
1622 p->dst = convert_dst(c, &inst->Dst[0]),
1623 p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1624 p->src[1] = alloc_imm_f32(c, 0.2225);
1625 p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1626
1627 for (int i = 0; &ins[i] <= p; i++)
1628 emit_inst(c, &ins[i]);
1629 }
1630 }
1631
1632 static void
1633 trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1634 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1635 {
1636 if (c->specs->has_new_transcendentals) {
1637 /* On newer chips alternative LOG instruction is implemented,
1638 * which outputs an x and y component, which need to be multiplied to
1639 * get the result.
1640 */
1641 struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1642 emit_inst(c, &(struct etna_inst) {
1643 .opcode = INST_OPCODE_LOG,
1644 .sat = 0,
1645 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1646 .src[2] = src[0],
1647 .tex = { .amode=1 }, /* Unknown bit needs to be set */
1648 });
1649 emit_inst(c, &(struct etna_inst) {
1650 .opcode = INST_OPCODE_MUL,
1651 .sat = inst->Instruction.Saturate,
1652 .dst = convert_dst(c, &inst->Dst[0]),
1653 .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1654 .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1655 });
1656 } else {
1657 emit_inst(c, &(struct etna_inst) {
1658 .opcode = INST_OPCODE_LOG,
1659 .sat = inst->Instruction.Saturate,
1660 .dst = convert_dst(c, &inst->Dst[0]),
1661 .src[2] = src[0],
1662 });
1663 }
1664 }
1665
1666 static void
1667 trans_dph(const struct instr_translater *t, struct etna_compile *c,
1668 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1669 {
1670 /*
1671 DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
1672 ADD dst.xyzw, tmp.xyzw, void, src1.wwww
1673 */
1674 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1675 struct etna_inst ins[2] = { };
1676
1677 ins[0].opcode = INST_OPCODE_DP3;
1678 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1679 INST_COMPS_Z | INST_COMPS_W);
1680 ins[0].src[0] = src[0];
1681 ins[0].src[1] = src[1];
1682
1683 ins[1].opcode = INST_OPCODE_ADD;
1684 ins[1].sat = inst->Instruction.Saturate;
1685 ins[1].dst = convert_dst(c, &inst->Dst[0]);
1686 ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1687 ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
1688
1689 emit_inst(c, &ins[0]);
1690 emit_inst(c, &ins[1]);
1691 }
1692
1693 static void
1694 trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1695 const struct tgsi_full_instruction *inst,
1696 struct etna_inst_src *src)
1697 {
1698 /* There is no native support for GL texture rectangle coordinates, so
1699 * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1700 if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1701 uint32_t unit = inst->Src[1].Register.Index;
1702 struct etna_inst ins[2] = { };
1703 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1704
1705 ins[0].opcode = INST_OPCODE_MUL;
1706 ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1707 ins[0].src[0] = src[0];
1708 ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
1709
1710 ins[1].opcode = INST_OPCODE_MUL;
1711 ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1712 ins[1].src[0] = src[0];
1713 ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
1714
1715 emit_inst(c, &ins[0]);
1716 emit_inst(c, &ins[1]);
1717
1718 src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1719 }
1720
1721 switch (inst->Instruction.Opcode) {
1722 case TGSI_OPCODE_TEX:
1723 emit_inst(c, &(struct etna_inst) {
1724 .opcode = INST_OPCODE_TEXLD,
1725 .sat = 0,
1726 .dst = convert_dst(c, &inst->Dst[0]),
1727 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1728 .src[0] = src[0],
1729 });
1730 break;
1731
1732 case TGSI_OPCODE_TXB:
1733 emit_inst(c, &(struct etna_inst) {
1734 .opcode = INST_OPCODE_TEXLDB,
1735 .sat = 0,
1736 .dst = convert_dst(c, &inst->Dst[0]),
1737 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1738 .src[0] = src[0],
1739 });
1740 break;
1741
1742 case TGSI_OPCODE_TXL:
1743 emit_inst(c, &(struct etna_inst) {
1744 .opcode = INST_OPCODE_TEXLDL,
1745 .sat = 0,
1746 .dst = convert_dst(c, &inst->Dst[0]),
1747 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1748 .src[0] = src[0],
1749 });
1750 break;
1751
1752 case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1753 struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1754
1755 emit_inst(c, &(struct etna_inst) {
1756 .opcode = INST_OPCODE_RCP,
1757 .sat = 0,
1758 .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1759 .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1760 });
1761 emit_inst(c, &(struct etna_inst) {
1762 .opcode = INST_OPCODE_MUL,
1763 .sat = 0,
1764 .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1765 INST_COMPS_Z), /* tmp.xyz */
1766 .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1767 .src[1] = src[0], /* src.xyzw */
1768 });
1769 emit_inst(c, &(struct etna_inst) {
1770 .opcode = INST_OPCODE_TEXLD,
1771 .sat = 0,
1772 .dst = convert_dst(c, &inst->Dst[0]),
1773 .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1774 .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1775 });
1776 } break;
1777
1778 default:
1779 BUG("Unhandled instruction %s",
1780 tgsi_get_opcode_name(inst->Instruction.Opcode));
1781 assert(0);
1782 break;
1783 }
1784 }
1785
1786 static void
1787 trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1788 const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1789 {
1790 /* nothing to do */
1791 }
1792
1793 static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1794 #define INSTR(n, f, ...) \
1795 [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1796
1797 INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1798 INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1799 INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1800 INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1801 INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1802 INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1803 INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1804 INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1805 INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1806 INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1807 INSTR(LG2, trans_lg2),
1808 INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1809 INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1810 INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1811 INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1812 INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1813
1814 INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1815 INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1816
1817 INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1818 INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1819
1820 INSTR(IF, trans_if),
1821 INSTR(ELSE, trans_else),
1822 INSTR(ENDIF, trans_endif),
1823
1824 INSTR(BGNLOOP, trans_loop_bgn),
1825 INSTR(ENDLOOP, trans_loop_end),
1826 INSTR(BRK, trans_brk),
1827 INSTR(CONT, trans_cont),
1828
1829 INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1830 INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1831
1832 INSTR(ARL, trans_arl),
1833 INSTR(LRP, trans_lrp),
1834 INSTR(LIT, trans_lit),
1835 INSTR(SSG, trans_ssg),
1836 INSTR(DPH, trans_dph),
1837
1838 INSTR(SIN, trans_trig),
1839 INSTR(COS, trans_trig),
1840 INSTR(SCS, trans_trig),
1841
1842 INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1843 INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1844 INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1845 INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1846 INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1847 INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1848
1849 INSTR(TEX, trans_sampler),
1850 INSTR(TXB, trans_sampler),
1851 INSTR(TXL, trans_sampler),
1852 INSTR(TXP, trans_sampler),
1853
1854 INSTR(NOP, trans_dummy),
1855 INSTR(END, trans_dummy),
1856 };
1857
1858 /* Pass -- compile instructions */
1859 static void
1860 etna_compile_pass_generate_code(struct etna_compile *c)
1861 {
1862 struct tgsi_parse_context ctx = { };
1863 unsigned status = tgsi_parse_init(&ctx, c->tokens);
1864 assert(status == TGSI_PARSE_OK);
1865
1866 int inst_idx = 0;
1867 while (!tgsi_parse_end_of_tokens(&ctx)) {
1868 const struct tgsi_full_instruction *inst = 0;
1869
1870 /* No inner temps used yet for this instruction, clear counter */
1871 c->inner_temps = 0;
1872
1873 tgsi_parse_token(&ctx);
1874
1875 switch (ctx.FullToken.Token.Type) {
1876 case TGSI_TOKEN_TYPE_INSTRUCTION:
1877 /* iterate over operands */
1878 inst = &ctx.FullToken.FullInstruction;
1879 if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1880 inst_idx++;
1881 continue;
1882 }
1883
1884 /* Lookup the TGSI information and generate the source arguments */
1885 struct etna_inst_src src[ETNA_NUM_SRC];
1886 memset(src, 0, sizeof(src));
1887
1888 const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1889
1890 for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1891 const struct tgsi_full_src_register *reg = &inst->Src[i];
1892 const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
1893
1894 if (!n->valid || n->is_tex)
1895 continue;
1896
1897 src[i] = etna_create_src(reg, n);
1898 }
1899
1900 const unsigned opc = inst->Instruction.Opcode;
1901 const struct instr_translater *t = &translaters[opc];
1902
1903 if (t->fxn) {
1904 t->fxn(t, c, inst, src);
1905
1906 inst_idx += 1;
1907 } else {
1908 BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1909 assert(0);
1910 }
1911 break;
1912 }
1913 }
1914 tgsi_parse_free(&ctx);
1915 }
1916
1917 /* Look up register by semantic */
1918 static struct etna_reg_desc *
1919 find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1920 {
1921 for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1922 struct etna_reg_desc *reg = &c->file[file].reg[idx];
1923
1924 if (reg->semantic.Name == name && reg->semantic.Index == index)
1925 return reg;
1926 }
1927
1928 return NULL; /* not found */
1929 }
1930
1931 /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1932 * - this is a vertex shader
1933 * - and this is an older GPU
1934 */
1935 static void
1936 etna_compile_add_z_div_if_needed(struct etna_compile *c)
1937 {
1938 if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1939 /* find position out */
1940 struct etna_reg_desc *pos_reg =
1941 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1942
1943 if (pos_reg != NULL) {
1944 /*
1945 * ADD tX.__z_, tX.zzzz, void, tX.wwww
1946 * MUL tX.__z_, tX.zzzz, 0.5, void
1947 */
1948 emit_inst(c, &(struct etna_inst) {
1949 .opcode = INST_OPCODE_ADD,
1950 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1951 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1952 .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1953 });
1954 emit_inst(c, &(struct etna_inst) {
1955 .opcode = INST_OPCODE_MUL,
1956 .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1957 .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1958 .src[1] = alloc_imm_f32(c, 0.5f),
1959 });
1960 }
1961 }
1962 }
1963
1964 static void
1965 etna_compile_frag_rb_swap(struct etna_compile *c)
1966 {
1967 if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1968 /* find color out */
1969 struct etna_reg_desc *color_reg =
1970 find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1971
1972 emit_inst(c, &(struct etna_inst) {
1973 .opcode = INST_OPCODE_MOV,
1974 .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1975 .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1976 });
1977 }
1978 }
1979
1980 /** add a NOP to the shader if
1981 * a) the shader is empty
1982 * or
1983 * b) there is a label at the end of the shader
1984 */
1985 static void
1986 etna_compile_add_nop_if_needed(struct etna_compile *c)
1987 {
1988 bool label_at_last_inst = false;
1989
1990 for (int idx = 0; idx < c->labels_count; ++idx) {
1991 if (c->labels[idx].inst_idx == c->inst_ptr)
1992 label_at_last_inst = true;
1993
1994 }
1995
1996 if (c->inst_ptr == 0 || label_at_last_inst)
1997 emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
1998 }
1999
2000 static void
2001 assign_uniforms(struct etna_compile_file *file, unsigned base)
2002 {
2003 for (int idx = 0; idx < file->reg_size; ++idx) {
2004 file->reg[idx].native.valid = 1;
2005 file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
2006 file->reg[idx].native.id = base + idx;
2007 }
2008 }
2009
2010 /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
2011 * CONST must be consecutive as const buffers are supposed to be consecutive,
2012 * and before IMM, as this is
2013 * more convenient because is possible for the compilation process itself to
2014 * generate extra
2015 * immediates for constants such as pi, one, zero.
2016 */
2017 static void
2018 assign_constants_and_immediates(struct etna_compile *c)
2019 {
2020 assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
2021 /* immediates start after the constants */
2022 c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
2023 assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
2024 DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
2025 c->imm_size);
2026 }
2027
2028 /* Assign declared samplers to native texture units */
2029 static void
2030 assign_texture_units(struct etna_compile *c)
2031 {
2032 uint tex_base = 0;
2033
2034 if (c->info.processor == PIPE_SHADER_VERTEX)
2035 tex_base = c->specs->vertex_sampler_offset;
2036
2037 for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
2038 c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
2039 c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
2040 c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
2041 }
2042 }
2043
2044 /* Additional pass to fill in branch targets. This pass should be last
2045 * as no instruction reordering or removing/addition can be done anymore
2046 * once the branch targets are computed.
2047 */
2048 static void
2049 etna_compile_fill_in_labels(struct etna_compile *c)
2050 {
2051 for (int idx = 0; idx < c->inst_ptr; ++idx) {
2052 if (c->lbl_usage[idx] != -1)
2053 etna_assemble_set_imm(&c->code[idx * 4],
2054 c->labels[c->lbl_usage[idx]].inst_idx);
2055 }
2056 }
2057
2058 /* compare two etna_native_reg structures, return true if equal */
2059 static bool
2060 cmp_etna_native_reg(const struct etna_native_reg to,
2061 const struct etna_native_reg from)
2062 {
2063 return to.valid == from.valid && to.is_tex == from.is_tex &&
2064 to.rgroup == from.rgroup && to.id == from.id;
2065 }
2066
2067 /* go through all declarations and swap native registers *to* and *from* */
2068 static void
2069 swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2070 const struct etna_native_reg from)
2071 {
2072 if (cmp_etna_native_reg(from, to))
2073 return; /* Nothing to do */
2074
2075 for (int idx = 0; idx < c->total_decls; ++idx) {
2076 if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2077 c->decl[idx].native = to;
2078 } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2079 c->decl[idx].native = from;
2080 }
2081 }
2082 }
2083
2084 /* For PS we need to permute so that inputs are always in temporary 0..N-1.
2085 * Semantic POS is always t0. If that semantic is not used, avoid t0.
2086 */
2087 static void
2088 permute_ps_inputs(struct etna_compile *c)
2089 {
2090 /* Special inputs:
2091 * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
2092 * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
2093 */
2094 uint native_idx = 1;
2095
2096 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2097 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2098 uint input_id;
2099 assert(reg->has_semantic);
2100
2101 if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
2102 continue;
2103
2104 input_id = native_idx++;
2105 swap_native_registers(c, etna_native_temp(input_id),
2106 c->file[TGSI_FILE_INPUT].reg[idx].native);
2107 }
2108
2109 c->num_varyings = native_idx - 1;
2110
2111 if (native_idx > c->next_free_native)
2112 c->next_free_native = native_idx;
2113 }
2114
2115 /* fill in ps inputs into shader object */
2116 static void
2117 fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2118 {
2119 struct etna_shader_io_file *sf = &sobj->infile;
2120
2121 sf->num_reg = 0;
2122
2123 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2124 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2125
2126 if (reg->native.id > 0) {
2127 assert(sf->num_reg < ETNA_NUM_INPUTS);
2128 sf->reg[sf->num_reg].reg = reg->native.id;
2129 sf->reg[sf->num_reg].semantic = reg->semantic;
2130 /* convert usage mask to number of components (*=wildcard)
2131 * .r (0..1) -> 1 component
2132 * .*g (2..3) -> 2 component
2133 * .**b (4..7) -> 3 components
2134 * .***a (8..15) -> 4 components
2135 */
2136 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2137 sf->num_reg++;
2138 }
2139 }
2140
2141 assert(sf->num_reg == c->num_varyings);
2142 sobj->input_count_unk8 = 31; /* XXX what is this */
2143 }
2144
2145 /* fill in output mapping for ps into shader object */
2146 static void
2147 fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2148 {
2149 sobj->outfile.num_reg = 0;
2150
2151 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2152 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2153
2154 switch (reg->semantic.Name) {
2155 case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2156 sobj->ps_color_out_reg = reg->native.id;
2157 break;
2158 case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2159 sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2160 break;
2161 default:
2162 assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2163 }
2164 }
2165 }
2166
2167 /* fill in inputs for vs into shader object */
2168 static void
2169 fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2170 {
2171 struct etna_shader_io_file *sf = &sobj->infile;
2172
2173 sf->num_reg = 0;
2174 for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2175 struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2176 assert(sf->num_reg < ETNA_NUM_INPUTS);
2177 /* XXX exclude inputs with special semantics such as gl_frontFacing */
2178 sf->reg[sf->num_reg].reg = reg->native.id;
2179 sf->reg[sf->num_reg].semantic = reg->semantic;
2180 sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2181 sf->num_reg++;
2182 }
2183
2184 sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2185 }
2186
2187 /* build two-level output index [Semantic][Index] for fast linking */
2188 static void
2189 build_output_index(struct etna_shader_variant *sobj)
2190 {
2191 int total = 0;
2192 int offset = 0;
2193
2194 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
2195 total += sobj->output_count_per_semantic[name];
2196
2197 sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
2198
2199 for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
2200 sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
2201 offset += sobj->output_count_per_semantic[name];
2202 }
2203
2204 for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
2205 sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
2206 [sobj->outfile.reg[idx].semantic.Index] =
2207 &sobj->outfile.reg[idx];
2208 }
2209 }
2210
2211 /* fill in outputs for vs into shader object */
2212 static void
2213 fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2214 {
2215 struct etna_shader_io_file *sf = &sobj->outfile;
2216
2217 sf->num_reg = 0;
2218 for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2219 struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2220 assert(sf->num_reg < ETNA_NUM_INPUTS);
2221
2222 switch (reg->semantic.Name) {
2223 case TGSI_SEMANTIC_POSITION:
2224 sobj->vs_pos_out_reg = reg->native.id;
2225 break;
2226 case TGSI_SEMANTIC_PSIZE:
2227 sobj->vs_pointsize_out_reg = reg->native.id;
2228 break;
2229 default:
2230 sf->reg[sf->num_reg].reg = reg->native.id;
2231 sf->reg[sf->num_reg].semantic = reg->semantic;
2232 sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2233 sf->num_reg++;
2234 sobj->output_count_per_semantic[reg->semantic.Name] =
2235 MAX2(reg->semantic.Index + 1,
2236 sobj->output_count_per_semantic[reg->semantic.Name]);
2237 }
2238 }
2239
2240 /* build two-level index for linking */
2241 build_output_index(sobj);
2242
2243 /* fill in "mystery meat" load balancing value. This value determines how
2244 * work is scheduled between VS and PS
2245 * in the unified shader architecture. More precisely, it is determined from
2246 * the number of VS outputs, as well as chip-specific
2247 * vertex output buffer size, vertex cache size, and the number of shader
2248 * cores.
2249 *
2250 * XXX this is a conservative estimate, the "optimal" value is only known for
2251 * sure at link time because some
2252 * outputs may be unused and thus unmapped. Then again, in the general use
2253 * case with GLSL the vertex and fragment
2254 * shaders are linked already before submitting to Gallium, thus all outputs
2255 * are used.
2256 */
2257 int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2258 assert(half_out);
2259
2260 uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2261 2 * half_out * c->specs->vertex_cache_size)) +
2262 9) /
2263 10;
2264 uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2265 sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2266 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2267 VIVS_VS_LOAD_BALANCING_C(0x3f) |
2268 VIVS_VS_LOAD_BALANCING_D(0x0f);
2269 }
2270
2271 static bool
2272 etna_compile_check_limits(struct etna_compile *c)
2273 {
2274 int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2275 ? c->specs->max_vs_uniforms
2276 : c->specs->max_ps_uniforms;
2277 /* round up number of uniforms, including immediates, in units of four */
2278 int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2279
2280 if (c->inst_ptr > c->specs->max_instructions) {
2281 DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2282 c->specs->max_instructions);
2283 return false;
2284 }
2285
2286 if (c->next_free_native > c->specs->max_registers) {
2287 DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2288 c->specs->max_registers);
2289 return false;
2290 }
2291
2292 if (num_uniforms > max_uniforms) {
2293 DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2294 max_uniforms);
2295 return false;
2296 }
2297
2298 if (c->num_varyings > c->specs->max_varyings) {
2299 DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2300 c->specs->max_varyings);
2301 return false;
2302 }
2303
2304 if (c->imm_base > c->specs->num_constants) {
2305 DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2306 c->specs->num_constants);
2307 }
2308
2309 return true;
2310 }
2311
2312 static void
2313 copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2314 {
2315 uint32_t count = c->imm_size;
2316 struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2317
2318 uinfo->const_count = c->imm_base;
2319 uinfo->imm_count = count;
2320 uinfo->imm_data = mem_dup(c->imm_data, count * sizeof(*c->imm_data));
2321 uinfo->imm_contents = mem_dup(c->imm_contents, count * sizeof(*c->imm_contents));
2322
2323 etna_set_shader_uniforms_dirty_flags(sobj);
2324 }
2325
2326 bool
2327 etna_compile_shader(struct etna_shader_variant *v)
2328 {
2329 /* Create scratch space that may be too large to fit on stack
2330 */
2331 bool ret;
2332 struct etna_compile *c;
2333
2334 if (unlikely(!v))
2335 return false;
2336
2337 const struct etna_specs *specs = v->shader->specs;
2338
2339 struct tgsi_lowering_config lconfig = {
2340 .lower_SCS = specs->has_sin_cos_sqrt,
2341 .lower_FLR = !specs->has_sign_floor_ceil,
2342 .lower_CEIL = !specs->has_sign_floor_ceil,
2343 .lower_POW = true,
2344 .lower_EXP = true,
2345 .lower_LOG = true,
2346 .lower_DP2 = true,
2347 .lower_DP2A = true,
2348 .lower_TRUNC = true,
2349 .lower_XPD = true
2350 };
2351
2352 c = CALLOC_STRUCT(etna_compile);
2353 if (!c)
2354 return false;
2355
2356 memset(&c->lbl_usage, -1, ARRAY_SIZE(c->lbl_usage));
2357
2358 const struct tgsi_token *tokens = v->shader->tokens;
2359
2360 c->specs = specs;
2361 c->key = &v->key;
2362 c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2363 c->free_tokens = !!c->tokens;
2364 if (!c->tokens) {
2365 /* no lowering */
2366 c->tokens = tokens;
2367 }
2368
2369 /* Build a map from gallium register to native registers for files
2370 * CONST, SAMP, IMM, OUT, IN, TEMP.
2371 * SAMP will map as-is for fragment shaders, there will be a +8 offset for
2372 * vertex shaders.
2373 */
2374 /* Pass one -- check register file declarations and immediates */
2375 etna_compile_parse_declarations(c);
2376
2377 etna_allocate_decls(c);
2378
2379 /* Pass two -- check usage of temporaries, inputs, outputs */
2380 etna_compile_pass_check_usage(c);
2381
2382 assign_special_inputs(c);
2383
2384 /* Assign native temp register to TEMPs */
2385 assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2386
2387 /* optimize outputs */
2388 etna_compile_pass_optimize_outputs(c);
2389
2390 /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
2391 * this is part of RGROUP_INTERNAL
2392 */
2393
2394 /* assign inputs: last usage of input should be <= first usage of temp */
2395 /* potential optimization case:
2396 * if single MOV TEMP[y], IN[x] before which temp y is not used, and
2397 * after which IN[x]
2398 * is not read, temp[y] can be used as input register as-is
2399 */
2400 /* sort temporaries by first use
2401 * sort inputs by last usage
2402 * iterate over inputs, temporaries
2403 * if last usage of input <= first usage of temp:
2404 * assign input to temp
2405 * advance input, temporary pointer
2406 * else
2407 * advance temporary pointer
2408 *
2409 * potential problem: instruction with multiple inputs of which one is the
2410 * temp and the other is the input;
2411 * however, as the temp is not used before this, how would this make
2412 * sense? uninitialized temporaries have an undefined
2413 * value, so this would be ok
2414 */
2415 assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2416
2417 /* assign outputs: first usage of output should be >= last usage of temp */
2418 /* potential optimization case:
2419 * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2420 * writing all components that are used in
2421 * the shader) after which temp y is no longer used temp[y] can be
2422 * used as output register as-is
2423 *
2424 * potential problem: instruction with multiple outputs of which one is the
2425 * temp and the other is the output;
2426 * however, as the temp is not used after this, how would this make
2427 * sense? could just discard the output value
2428 */
2429 /* sort temporaries by last use
2430 * sort outputs by first usage
2431 * iterate over outputs, temporaries
2432 * if first usage of output >= last usage of temp:
2433 * assign output to temp
2434 * advance output, temporary pointer
2435 * else
2436 * advance temporary pointer
2437 */
2438 assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2439
2440 assign_constants_and_immediates(c);
2441 assign_texture_units(c);
2442
2443 /* list declarations */
2444 for (int x = 0; x < c->total_decls; ++x) {
2445 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2446 "last_use=%i native=%i usage_mask=%x "
2447 "has_semantic=%i",
2448 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2449 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2450 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2451 c->decl[x].usage_mask, c->decl[x].has_semantic);
2452 if (c->decl[x].has_semantic)
2453 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2454 tgsi_semantic_names[c->decl[x].semantic.Name],
2455 c->decl[x].semantic.Index);
2456 }
2457 /* XXX for PS we need to permute so that inputs are always in temporary
2458 * 0..N-1.
2459 * There is no "switchboard" for varyings (AFAIK!). The output color,
2460 * however, can be routed
2461 * from an arbitrary temporary.
2462 */
2463 if (c->info.processor == PIPE_SHADER_FRAGMENT)
2464 permute_ps_inputs(c);
2465
2466
2467 /* list declarations */
2468 for (int x = 0; x < c->total_decls; ++x) {
2469 DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2470 "last_use=%i native=%i usage_mask=%x "
2471 "has_semantic=%i",
2472 x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2473 c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2474 c->decl[x].native.valid ? c->decl[x].native.id : -1,
2475 c->decl[x].usage_mask, c->decl[x].has_semantic);
2476 if (c->decl[x].has_semantic)
2477 DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2478 tgsi_semantic_names[c->decl[x].semantic.Name],
2479 c->decl[x].semantic.Index);
2480 }
2481
2482 /* pass 3: generate instructions */
2483 etna_compile_pass_generate_code(c);
2484 etna_compile_add_z_div_if_needed(c);
2485 etna_compile_frag_rb_swap(c);
2486 etna_compile_add_nop_if_needed(c);
2487
2488 ret = etna_compile_check_limits(c);
2489 if (!ret)
2490 goto out;
2491
2492 etna_compile_fill_in_labels(c);
2493
2494 /* fill in output structure */
2495 v->processor = c->info.processor;
2496 v->code_size = c->inst_ptr * 4;
2497 v->code = mem_dup(c->code, c->inst_ptr * 16);
2498 v->num_loops = c->num_loops;
2499 v->num_temps = c->next_free_native;
2500 v->vs_pos_out_reg = -1;
2501 v->vs_pointsize_out_reg = -1;
2502 v->ps_color_out_reg = -1;
2503 v->ps_depth_out_reg = -1;
2504 copy_uniform_state_to_shader(c, v);
2505
2506 if (c->info.processor == PIPE_SHADER_VERTEX) {
2507 fill_in_vs_inputs(v, c);
2508 fill_in_vs_outputs(v, c);
2509 } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2510 fill_in_ps_inputs(v, c);
2511 fill_in_ps_outputs(v, c);
2512 }
2513
2514 out:
2515 if (c->free_tokens)
2516 FREE((void *)c->tokens);
2517
2518 FREE(c->labels);
2519 FREE(c);
2520
2521 return ret;
2522 }
2523
2524 extern const char *tgsi_swizzle_names[];
2525 void
2526 etna_dump_shader(const struct etna_shader_variant *shader)
2527 {
2528 if (shader->processor == PIPE_SHADER_VERTEX)
2529 printf("VERT\n");
2530 else
2531 printf("FRAG\n");
2532
2533
2534 etna_disasm(shader->code, shader->code_size, PRINT_RAW);
2535
2536 printf("num loops: %i\n", shader->num_loops);
2537 printf("num temps: %i\n", shader->num_temps);
2538 printf("num const: %i\n", shader->uniforms.const_count);
2539 printf("immediates:\n");
2540 for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
2541 printf(" [%i].%s = %f (0x%08x)\n",
2542 (idx + shader->uniforms.const_count) / 4,
2543 tgsi_swizzle_names[idx % 4],
2544 *((float *)&shader->uniforms.imm_data[idx]),
2545 shader->uniforms.imm_data[idx]);
2546 }
2547 printf("inputs:\n");
2548 for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
2549 printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
2550 tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
2551 shader->infile.reg[idx].semantic.Index,
2552 shader->infile.reg[idx].num_components);
2553 }
2554 printf("outputs:\n");
2555 for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
2556 printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
2557 tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
2558 shader->outfile.reg[idx].semantic.Index,
2559 shader->outfile.reg[idx].num_components);
2560 }
2561 printf("special:\n");
2562 if (shader->processor == PIPE_SHADER_VERTEX) {
2563 printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
2564 printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
2565 printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
2566 } else {
2567 printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg);
2568 printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
2569 }
2570 printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8);
2571 }
2572
2573 void
2574 etna_destroy_shader(struct etna_shader_variant *shader)
2575 {
2576 assert(shader);
2577
2578 FREE(shader->code);
2579 FREE(shader->uniforms.imm_data);
2580 FREE(shader->uniforms.imm_contents);
2581 FREE(shader->output_per_semantic_list);
2582 FREE(shader);
2583 }
2584
2585 static const struct etna_shader_inout *
2586 etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2587 const struct etna_shader_inout *in)
2588 {
2589 if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
2590 return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
2591
2592 return NULL;
2593 }
2594
2595 bool
2596 etna_link_shader(struct etna_shader_link_info *info,
2597 const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2598 {
2599 /* For each fragment input we need to find the associated vertex shader
2600 * output, which can be found by matching on semantic name and index. A
2601 * binary search could be used because the vs outputs are sorted by their
2602 * semantic index and grouped by semantic type by fill_in_vs_outputs.
2603 */
2604 assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2605
2606 for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2607 const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2608 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2609 struct etna_varying *varying;
2610
2611 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2612
2613 if (fsio->reg > info->num_varyings)
2614 info->num_varyings = fsio->reg;
2615
2616 varying = &info->varyings[fsio->reg - 1];
2617 varying->num_components = fsio->num_components;
2618
2619 if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
2620 varying->pa_attributes = 0x200;
2621 else /* texture coord or other bypasses flat shading */
2622 varying->pa_attributes = 0x2f1;
2623
2624 if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
2625 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2626 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2627 varying->use[2] = VARYING_COMPONENT_USE_USED;
2628 varying->use[3] = VARYING_COMPONENT_USE_USED;
2629 varying->reg = 0; /* replaced by point coord -- doesn't matter */
2630 continue;
2631 }
2632
2633 if (vsio == NULL)
2634 return true; /* not found -- link error */
2635
2636 varying->use[0] = VARYING_COMPONENT_USE_USED;
2637 varying->use[1] = VARYING_COMPONENT_USE_USED;
2638 varying->use[2] = VARYING_COMPONENT_USE_USED;
2639 varying->use[3] = VARYING_COMPONENT_USE_USED;
2640 varying->reg = vsio->reg;
2641 }
2642
2643 assert(info->num_varyings == fs->infile.num_reg);
2644
2645 return false;
2646 }