etnaviv_blt.h \
etnaviv_clear_blit.c \
etnaviv_clear_blit.h \
- etnaviv_compiler.c \
etnaviv_compiler.h \
etnaviv_compiler_nir.c \
etnaviv_compiler_nir_emit.h \
+ etnaviv_compiler_tgsi.c \
etnaviv_context.c \
etnaviv_context.h \
etnaviv_debug.h \
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Etnaviv Project
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sub license,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Wladimir J. van der Laan <laanwj@gmail.com>
- */
-
-/* TGSI->Vivante shader ISA conversion */
-
-/* What does the compiler return (see etna_shader_object)?
- * 1) instruction data
- * 2) input-to-temporary mapping (fixed for ps)
- * *) in case of ps, semantic -> varying id mapping
- * *) for each varying: number of components used (r, rg, rgb, rgba)
- * 3) temporary-to-output mapping (in case of vs, fixed for ps)
- * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
- * 5) immediates base offset, immediates data
- * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
- * configure the hw, but useful for error checking
- * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
- * (output reg id is enough)
- *
- * Empty shaders are not allowed, should always at least generate a NOP. Also
- * if there is a label at the end of the shader, an extra NOP should be
- * generated as jump target.
- *
- * TODO
- * * Use an instruction scheduler
- * * Indirect access to uniforms / temporaries using amode
- */
-
-#include "etnaviv_compiler.h"
-
-#include "etnaviv_asm.h"
-#include "etnaviv_context.h"
-#include "etnaviv_debug.h"
-#include "etnaviv_disasm.h"
-#include "etnaviv_uniforms.h"
-#include "etnaviv_util.h"
-
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_iterate.h"
-#include "tgsi/tgsi_lowering.h"
-#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_util.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#define ETNA_MAX_INNER_TEMPS 2
-
-static const float sincos_const[2][4] = {
- {
- 2., -1., 4., -4.,
- },
- {
- 1. / (2. * M_PI), 0.75, 0.5, 0.0,
- },
-};
-
-/* Native register description structure */
-struct etna_native_reg {
- unsigned valid : 1;
- unsigned is_tex : 1; /* is texture unit, overrides rgroup */
- unsigned rgroup : 3;
- unsigned id : 9;
-};
-
-/* Register description */
-struct etna_reg_desc {
- enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
- int idx; /* index into file */
- bool active; /* used in program */
- int first_use; /* instruction id of first use (scope begin) */
- int last_use; /* instruction id of last use (scope end, inclusive) */
-
- struct etna_native_reg native; /* native register to map to */
- unsigned usage_mask : 4; /* usage, per channel */
- bool has_semantic; /* register has associated TGSI semantic */
- struct tgsi_declaration_semantic semantic; /* TGSI semantic */
- struct tgsi_declaration_interp interp; /* Interpolation type */
-};
-
-/* Label information structure */
-struct etna_compile_label {
- int inst_idx; /* Instruction id that label points to */
-};
-
-enum etna_compile_frame_type {
- ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
- ETNA_COMPILE_FRAME_LOOP,
-};
-
-/* nesting scope frame (LOOP, IF, ...) during compilation
- */
-struct etna_compile_frame {
- enum etna_compile_frame_type type;
- int lbl_else_idx;
- int lbl_endif_idx;
- int lbl_loop_bgn_idx;
- int lbl_loop_end_idx;
-};
-
-struct etna_compile_file {
- /* Number of registers in each TGSI file (max register+1) */
- size_t reg_size;
- /* Register descriptions, per register index */
- struct etna_reg_desc *reg;
-};
-
-#define array_insert(arr, val) \
- do { \
- if (arr##_count == arr##_sz) { \
- arr##_sz = MAX2(2 * arr##_sz, 16); \
- arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
- } \
- arr[arr##_count++] = val; \
- } while (0)
-
-
-/* scratch area for compiling shader, freed after compilation finishes */
-struct etna_compile {
- const struct tgsi_token *tokens;
- bool free_tokens;
-
- struct tgsi_shader_info info;
-
- /* Register descriptions, per TGSI file, per register index */
- struct etna_compile_file file[TGSI_FILE_COUNT];
-
- /* Keep track of TGSI register declarations */
- struct etna_reg_desc decl[ETNA_MAX_DECL];
- uint total_decls;
-
- /* Bitmap of dead instructions which are removed in a separate pass */
- bool dead_inst[ETNA_MAX_TOKENS];
-
- /* Immediate data */
- enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
- uint32_t imm_data[ETNA_MAX_IMM];
- uint32_t imm_base; /* base of immediates (in 32 bit units) */
- uint32_t imm_size; /* size of immediates (in 32 bit units) */
-
- /* Next free native register, for register allocation */
- uint32_t next_free_native;
-
- /* Temporary register for use within translated TGSI instruction,
- * only allocated when needed.
- */
- int inner_temps; /* number of inner temps used; only up to one available at
- this point */
- struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
-
- /* Fields for handling nested conditionals */
- struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
- int frame_sp;
- int lbl_usage[ETNA_MAX_INSTRUCTIONS];
-
- unsigned labels_count, labels_sz;
- struct etna_compile_label *labels;
-
- unsigned num_loops;
-
- /* Code generation */
- int inst_ptr; /* current instruction pointer */
- uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
-
- /* I/O */
-
- /* Number of varyings (PS only) */
- int num_varyings;
-
- /* GPU hardware specs */
- const struct etna_specs *specs;
-
- const struct etna_shader_key *key;
-};
-
-static struct etna_reg_desc *
-etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
-{
- return &c->file[dst.File].reg[dst.Index];
-}
-
-static struct etna_reg_desc *
-etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
-{
- return &c->file[src.File].reg[src.Index];
-}
-
-static struct etna_native_reg
-etna_native_temp(unsigned reg)
-{
- return (struct etna_native_reg) {
- .valid = 1,
- .rgroup = INST_RGROUP_TEMP,
- .id = reg
- };
-}
-
-/** Register allocation **/
-enum reg_sort_order {
- FIRST_USE_ASC,
- FIRST_USE_DESC,
- LAST_USE_ASC,
- LAST_USE_DESC
-};
-
-/* Augmented register description for sorting */
-struct sort_rec {
- struct etna_reg_desc *ptr;
- int key;
-};
-
-static int
-sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
-{
- if (a->key < b->key)
- return -1;
-
- if (a->key > b->key)
- return 1;
-
- return 0;
-}
-
-/* create an index on a register set based on certain criteria. */
-static int
-sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
- enum reg_sort_order so)
-{
- struct etna_reg_desc *regs = file->reg;
- int ptr = 0;
-
- /* pre-populate keys from active registers */
- for (int idx = 0; idx < file->reg_size; ++idx) {
- /* only interested in active registers now; will only assign inactive ones
- * if no space in active ones */
- if (regs[idx].active) {
- sorted[ptr].ptr = ®s[idx];
-
- switch (so) {
- case FIRST_USE_ASC:
- sorted[ptr].key = regs[idx].first_use;
- break;
- case LAST_USE_ASC:
- sorted[ptr].key = regs[idx].last_use;
- break;
- case FIRST_USE_DESC:
- sorted[ptr].key = -regs[idx].first_use;
- break;
- case LAST_USE_DESC:
- sorted[ptr].key = -regs[idx].last_use;
- break;
- }
- ptr++;
- }
- }
-
- /* sort index by key */
- qsort(sorted, ptr, sizeof(struct sort_rec),
- (int (*)(const void *, const void *))sort_rec_compar);
-
- return ptr;
-}
-
-/* Allocate a new, unused, native temp register */
-static struct etna_native_reg
-alloc_new_native_reg(struct etna_compile *c)
-{
- assert(c->next_free_native < ETNA_MAX_TEMPS);
- return etna_native_temp(c->next_free_native++);
-}
-
-/* assign TEMPs to native registers */
-static void
-assign_temporaries_to_native(struct etna_compile *c,
- struct etna_compile_file *file)
-{
- struct etna_reg_desc *temps = file->reg;
-
- for (int idx = 0; idx < file->reg_size; ++idx)
- temps[idx].native = alloc_new_native_reg(c);
-}
-
-/* assign inputs and outputs to temporaries
- * Gallium assumes that the hardware has separate registers for taking input and
- * output, however Vivante GPUs use temporaries both for passing in inputs and
- * passing back outputs.
- * Try to re-use temporary registers where possible. */
-static void
-assign_inouts_to_temporaries(struct etna_compile *c, uint file)
-{
- bool mode_inputs = (file == TGSI_FILE_INPUT);
- int inout_ptr = 0, num_inouts;
- int temp_ptr = 0, num_temps;
- struct sort_rec inout_order[ETNA_MAX_TEMPS];
- struct sort_rec temps_order[ETNA_MAX_TEMPS];
- num_inouts = sort_registers(inout_order, &c->file[file],
- mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
- num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
- mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
-
- while (inout_ptr < num_inouts && temp_ptr < num_temps) {
- struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
- struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
-
- if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
- inout_ptr++;
- continue;
- }
-
- /* last usage of this input is before or in same instruction of first use
- * of temporary? */
- if (mode_inputs ? (inout->last_use <= temp->first_use)
- : (inout->first_use >= temp->last_use)) {
- /* assign it and advance to next input */
- inout->native = temp->native;
- inout_ptr++;
- }
-
- temp_ptr++;
- }
-
- /* if we couldn't reuse current ones, allocate new temporaries */
- for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
- struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
-
- if (inout->active && !inout->native.valid)
- inout->native = alloc_new_native_reg(c);
- }
-}
-
-/* Allocate an immediate with a certain value and return the index. If
- * there is already an immediate with that value, return that.
- */
-static struct etna_inst_src
-alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
- uint32_t value)
-{
- int idx;
-
- /* Could use a hash table to speed this up */
- for (idx = 0; idx < c->imm_size; ++idx) {
- if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
- break;
- }
-
- /* look if there is an unused slot */
- if (idx == c->imm_size) {
- for (idx = 0; idx < c->imm_size; ++idx) {
- if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
- break;
- }
- }
-
- /* allocate new immediate */
- if (idx == c->imm_size) {
- assert(c->imm_size < ETNA_MAX_IMM);
- idx = c->imm_size++;
- c->imm_data[idx] = value;
- c->imm_contents[idx] = contents;
- }
-
- /* swizzle so that component with value is returned in all components */
- idx += c->imm_base;
- struct etna_inst_src imm_src = {
- .use = 1,
- .rgroup = INST_RGROUP_UNIFORM_0,
- .reg = idx / 4,
- .swiz = INST_SWIZ_BROADCAST(idx & 3)
- };
-
- return imm_src;
-}
-
-static struct etna_inst_src
-alloc_imm_u32(struct etna_compile *c, uint32_t value)
-{
- return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
-}
-
-static struct etna_inst_src
-alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
- const uint32_t *values)
-{
- struct etna_inst_src imm_src = { };
- int idx, i;
-
- for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
- /* What if we can use a uniform with a different swizzle? */
- for (i = 0; i < 4; i++)
- if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
- break;
- if (i == 4)
- break;
- }
-
- if (idx + 3 >= c->imm_size) {
- idx = align(c->imm_size, 4);
- assert(idx + 4 <= ETNA_MAX_IMM);
-
- for (i = 0; i < 4; i++) {
- c->imm_data[idx + i] = values[i];
- c->imm_contents[idx + i] = contents;
- }
-
- c->imm_size = idx + 4;
- }
-
- assert((c->imm_base & 3) == 0);
- idx += c->imm_base;
- imm_src.use = 1;
- imm_src.rgroup = INST_RGROUP_UNIFORM_0;
- imm_src.reg = idx / 4;
- imm_src.swiz = INST_SWIZ_IDENTITY;
-
- return imm_src;
-}
-
-static uint32_t
-get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
- unsigned swiz_idx)
-{
- assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
- unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
-
- return c->imm_data[idx];
-}
-
-/* Allocate immediate with a certain float value. If there is already an
- * immediate with that value, return that.
- */
-static struct etna_inst_src
-alloc_imm_f32(struct etna_compile *c, float value)
-{
- return alloc_imm_u32(c, fui(value));
-}
-
-static struct etna_inst_src
-etna_imm_vec4f(struct etna_compile *c, const float *vec4)
-{
- uint32_t val[4];
-
- for (int i = 0; i < 4; i++)
- val[i] = fui(vec4[i]);
-
- return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
-}
-
-/* Pass -- check register file declarations and immediates */
-static void
-etna_compile_parse_declarations(struct etna_compile *c)
-{
- struct tgsi_parse_context ctx = { };
- ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
- assert(status == TGSI_PARSE_OK);
-
- while (!tgsi_parse_end_of_tokens(&ctx)) {
- tgsi_parse_token(&ctx);
-
- switch (ctx.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE: {
- /* immediates are handled differently from other files; they are
- * not declared explicitly, and always add four components */
- const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
- assert(c->imm_size <= (ETNA_MAX_IMM - 4));
-
- for (int i = 0; i < 4; ++i) {
- unsigned idx = c->imm_size++;
-
- c->imm_data[idx] = imm->u[i].Uint;
- c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
- }
- }
- break;
- }
- }
-
- tgsi_parse_free(&ctx);
-}
-
-/* Allocate register declarations for the registers in all register files */
-static void
-etna_allocate_decls(struct etna_compile *c)
-{
- uint idx = 0;
-
- for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
- c->file[x].reg = &c->decl[idx];
- c->file[x].reg_size = c->info.file_max[x] + 1;
-
- for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
- c->decl[idx].file = x;
- c->decl[idx].idx = sub;
- idx++;
- }
- }
-
- c->total_decls = idx;
-}
-
-/* Pass -- check and record usage of temporaries, inputs, outputs */
-static void
-etna_compile_pass_check_usage(struct etna_compile *c)
-{
- struct tgsi_parse_context ctx = { };
- ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
- assert(status == TGSI_PARSE_OK);
-
- for (int idx = 0; idx < c->total_decls; ++idx) {
- c->decl[idx].active = false;
- c->decl[idx].first_use = c->decl[idx].last_use = -1;
- }
-
- int inst_idx = 0;
- while (!tgsi_parse_end_of_tokens(&ctx)) {
- tgsi_parse_token(&ctx);
- /* find out max register #s used
- * For every register mark first and last instruction index where it's
- * used this allows finding ranges where the temporary can be borrowed
- * as input and/or output register
- *
- * XXX in the case of loops this needs special care, or even be completely
- * disabled, as
- * the last usage of a register inside a loop means it can still be used
- * on next loop
- * iteration (execution is no longer * chronological). The register can
- * only be
- * declared "free" after the loop finishes.
- *
- * Same for inputs: the first usage of a register inside a loop doesn't
- * mean that the register
- * won't have been overwritten in previous iteration. The register can
- * only be declared free before the loop
- * starts.
- * The proper way would be to do full dominator / post-dominator analysis
- * (especially with more complicated
- * control flow such as direct branch instructions) but not for now...
- */
- switch (ctx.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION: {
- /* Declaration: fill in file details */
- const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
- struct etna_compile_file *file = &c->file[decl->Declaration.File];
-
- for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
- file->reg[idx].usage_mask = 0; // we'll compute this ourselves
- file->reg[idx].has_semantic = decl->Declaration.Semantic;
- file->reg[idx].semantic = decl->Semantic;
- file->reg[idx].interp = decl->Interp;
- }
- } break;
- case TGSI_TOKEN_TYPE_INSTRUCTION: {
- /* Instruction: iterate over operands of instruction */
- const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
-
- /* iterate over destination registers */
- for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
- struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
-
- if (reg_desc->first_use == -1)
- reg_desc->first_use = inst_idx;
-
- reg_desc->last_use = inst_idx;
- reg_desc->active = true;
- }
-
- /* iterate over source registers */
- for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
- struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
-
- if (reg_desc->first_use == -1)
- reg_desc->first_use = inst_idx;
-
- reg_desc->last_use = inst_idx;
- reg_desc->active = true;
- /* accumulate usage mask for register, this is used to determine how
- * many slots for varyings
- * should be allocated */
- reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
- }
- inst_idx += 1;
- } break;
- default:
- break;
- }
- }
-
- tgsi_parse_free(&ctx);
-}
-
-/* assign inputs that need to be assigned to specific registers */
-static void
-assign_special_inputs(struct etna_compile *c)
-{
- if (c->info.processor == PIPE_SHADER_FRAGMENT) {
- /* never assign t0 as it is the position output, start assigning at t1 */
- c->next_free_native = 1;
-
- /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
- for (int idx = 0; idx < c->total_decls; ++idx) {
- struct etna_reg_desc *reg = &c->decl[idx];
-
- if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
- reg->native = etna_native_temp(0);
- }
- }
-}
-
-/* Check that a move instruction does not swizzle any of the components
- * that it writes.
- */
-static bool
-etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
- const struct tgsi_src_register src)
-{
- return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
- (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
- (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
- (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
-}
-
-/* Pass -- optimize outputs
- * Mesa tends to generate code like this at the end if their shaders
- * MOV OUT[1], TEMP[2]
- * MOV OUT[0], TEMP[0]
- * MOV OUT[2], TEMP[1]
- * Recognize if
- * a) there is only a single assignment to an output register and
- * b) the temporary is not used after that
- * Also recognize direct assignment of IN to OUT (passthrough)
- **/
-static void
-etna_compile_pass_optimize_outputs(struct etna_compile *c)
-{
- struct tgsi_parse_context ctx = { };
- int inst_idx = 0;
- ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
- assert(status == TGSI_PARSE_OK);
-
- while (!tgsi_parse_end_of_tokens(&ctx)) {
- tgsi_parse_token(&ctx);
-
- switch (ctx.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION: {
- const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
-
- /* iterate over operands */
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_MOV: {
- /* We are only interested in eliminating MOVs which write to
- * the shader outputs. Test for this early. */
- if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
- break;
- /* Elimination of a MOV must have no visible effect on the
- * resulting shader: this means the MOV must not swizzle or
- * saturate, and its source must not have the negate or
- * absolute modifiers. */
- if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
- inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
- inst->Src[0].Register.Absolute)
- break;
-
- uint out_idx = inst->Dst[0].Register.Index;
- uint in_idx = inst->Src[0].Register.Index;
- /* assignment of temporary to output --
- * and the output doesn't yet have a native register assigned
- * and the last use of the temporary is this instruction
- * and the MOV does not do a swizzle
- */
- if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
- !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
- c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
- c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
- c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
- /* prevent temp from being re-used for the rest of the shader */
- c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
- /* mark this MOV instruction as a no-op */
- c->dead_inst[inst_idx] = true;
- }
- /* direct assignment of input to output --
- * and the input or output doesn't yet have a native register
- * assigned
- * and the output is only used in this instruction,
- * allocate a new register, and associate both input and output to
- * it
- * and the MOV does not do a swizzle
- */
- if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
- !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
- !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
- c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
- c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
- c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
- c->file[TGSI_FILE_INPUT].reg[in_idx].native =
- alloc_new_native_reg(c);
- /* mark this MOV instruction as a no-op */
- c->dead_inst[inst_idx] = true;
- }
- } break;
- default:;
- }
- inst_idx += 1;
- } break;
- }
- }
-
- tgsi_parse_free(&ctx);
-}
-
-/* Get a temporary to be used within one TGSI instruction.
- * The first time that this function is called the temporary will be allocated.
- * Each call to this function will return the same temporary.
- */
-static struct etna_native_reg
-etna_compile_get_inner_temp(struct etna_compile *c)
-{
- int inner_temp = c->inner_temps;
-
- if (inner_temp < ETNA_MAX_INNER_TEMPS) {
- if (!c->inner_temp[inner_temp].valid)
- c->inner_temp[inner_temp] = alloc_new_native_reg(c);
-
- /* alloc_new_native_reg() handles lack of registers */
- c->inner_temps += 1;
- } else {
- BUG("Too many inner temporaries (%i) requested in one instruction",
- inner_temp + 1);
- }
-
- return c->inner_temp[inner_temp];
-}
-
-static struct etna_inst_dst
-etna_native_to_dst(struct etna_native_reg native, unsigned comps)
-{
- /* Can only assign to temporaries */
- assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
-
- struct etna_inst_dst rv = {
- .write_mask = comps,
- .use = 1,
- .reg = native.id,
- };
-
- return rv;
-}
-
-static struct etna_inst_src
-etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
-{
- assert(native.valid && !native.is_tex);
-
- struct etna_inst_src rv = {
- .use = 1,
- .swiz = swizzle,
- .rgroup = native.rgroup,
- .reg = native.id,
- .amode = INST_AMODE_DIRECT,
- };
-
- return rv;
-}
-
-static inline struct etna_inst_src
-negate(struct etna_inst_src src)
-{
- src.neg = !src.neg;
-
- return src;
-}
-
-static inline struct etna_inst_src
-absolute(struct etna_inst_src src)
-{
- src.abs = 1;
-
- return src;
-}
-
-static inline struct etna_inst_src
-swizzle(struct etna_inst_src src, unsigned swizzle)
-{
- src.swiz = inst_swiz_compose(src.swiz, swizzle);
-
- return src;
-}
-
-/* Emit instruction and append it to program */
-static void
-emit_inst(struct etna_compile *c, struct etna_inst *inst)
-{
- assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
-
- /* Check for uniform conflicts (each instruction can only access one
- * uniform),
- * if detected, use an intermediate temporary */
- unsigned uni_rgroup = -1;
- unsigned uni_reg = -1;
-
- for (int src = 0; src < ETNA_NUM_SRC; ++src) {
- if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
- if (uni_reg == -1) { /* first unique uniform used */
- uni_rgroup = inst->src[src].rgroup;
- uni_reg = inst->src[src].reg;
- } else { /* second or later; check that it is a re-use */
- if (uni_rgroup != inst->src[src].rgroup ||
- uni_reg != inst->src[src].reg) {
- DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
- "accesses different uniforms, "
- "need to generate extra MOV");
- struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
-
- /* Generate move instruction to temporary */
- etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
- .opcode = INST_OPCODE_MOV,
- .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W),
- .src[2] = inst->src[src]
- });
-
- c->inst_ptr++;
-
- /* Modify instruction to use temp register instead of uniform */
- inst->src[src].use = 1;
- inst->src[src].rgroup = INST_RGROUP_TEMP;
- inst->src[src].reg = inner_temp.id;
- inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
- inst->src[src].neg = 0; /* negation happens on MOV */
- inst->src[src].abs = 0; /* abs happens on MOV */
- inst->src[src].amode = 0; /* amode effects happen on MOV */
- }
- }
- }
- }
-
- /* Finally assemble the actual instruction */
- etna_assemble(&c->code[c->inst_ptr * 4], inst);
- c->inst_ptr++;
-}
-
-static unsigned int
-etna_amode(struct tgsi_ind_register indirect)
-{
- assert(indirect.File == TGSI_FILE_ADDRESS);
- assert(indirect.Index == 0);
-
- switch (indirect.Swizzle) {
- case TGSI_SWIZZLE_X:
- return INST_AMODE_ADD_A_X;
- case TGSI_SWIZZLE_Y:
- return INST_AMODE_ADD_A_Y;
- case TGSI_SWIZZLE_Z:
- return INST_AMODE_ADD_A_Z;
- case TGSI_SWIZZLE_W:
- return INST_AMODE_ADD_A_W;
- default:
- assert(!"Invalid swizzle");
- }
-
- unreachable("bad swizzle");
-}
-
-/* convert destination operand */
-static struct etna_inst_dst
-convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
-{
- struct etna_inst_dst rv = {
- /// XXX .amode
- .write_mask = in->Register.WriteMask,
- };
-
- if (in->Register.File == TGSI_FILE_ADDRESS) {
- assert(in->Register.Index == 0);
- rv.reg = in->Register.Index;
- rv.use = 0;
- } else {
- rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
- in->Register.WriteMask);
- }
-
- if (in->Register.Indirect)
- rv.amode = etna_amode(in->Indirect);
-
- return rv;
-}
-
-/* convert texture operand */
-static struct etna_inst_tex
-convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
- const struct tgsi_instruction_texture *tex)
-{
- struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
- struct etna_inst_tex rv = {
- // XXX .amode (to allow for an array of samplers?)
- .swiz = INST_SWIZ_IDENTITY
- };
-
- assert(native_reg.is_tex && native_reg.valid);
- rv.id = native_reg.id;
-
- return rv;
-}
-
-/* convert source operand */
-static struct etna_inst_src
-etna_create_src(const struct tgsi_full_src_register *tgsi,
- const struct etna_native_reg *native)
-{
- const struct tgsi_src_register *reg = &tgsi->Register;
- struct etna_inst_src rv = {
- .use = 1,
- .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
- .neg = reg->Negate,
- .abs = reg->Absolute,
- .rgroup = native->rgroup,
- .reg = native->id,
- .amode = INST_AMODE_DIRECT,
- };
-
- assert(native->valid && !native->is_tex);
-
- if (reg->Indirect)
- rv.amode = etna_amode(tgsi->Indirect);
-
- return rv;
-}
-
-static struct etna_inst_src
-etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
- struct etna_native_reg temp)
-{
- struct etna_inst mov = { };
-
- mov.opcode = INST_OPCODE_MOV;
- mov.sat = 0;
- mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W);
- mov.src[2] = src;
- emit_inst(c, &mov);
-
- src.swiz = INST_SWIZ_IDENTITY;
- src.neg = src.abs = 0;
- src.rgroup = temp.rgroup;
- src.reg = temp.id;
-
- return src;
-}
-
-static struct etna_inst_src
-etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
-{
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
-
- return etna_mov_src_to_temp(c, src, temp);
-}
-
-static bool
-etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
-{
- return etna_rgroup_is_uniform(a.rgroup) &&
- etna_rgroup_is_uniform(b.rgroup) &&
- (a.rgroup != b.rgroup || a.reg != b.reg);
-}
-
-/* create a new label */
-static unsigned int
-alloc_new_label(struct etna_compile *c)
-{
- struct etna_compile_label label = {
- .inst_idx = -1, /* start by point to no specific instruction */
- };
-
- array_insert(c->labels, label);
-
- return c->labels_count - 1;
-}
-
-/* place label at current instruction pointer */
-static void
-label_place(struct etna_compile *c, struct etna_compile_label *label)
-{
- label->inst_idx = c->inst_ptr;
-}
-
-/* mark label use at current instruction.
- * target of the label will be filled in in the marked instruction's src2.imm
- * slot as soon
- * as the value becomes known.
- */
-static void
-label_mark_use(struct etna_compile *c, int lbl_idx)
-{
- assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
- c->lbl_usage[c->inst_ptr] = lbl_idx;
-}
-
-/* walk the frame stack and return first frame with matching type */
-static struct etna_compile_frame *
-find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
-{
- for (int sp = c->frame_sp; sp >= 0; sp--)
- if (c->frame_stack[sp].type == type)
- return &c->frame_stack[sp];
-
- assert(0);
- return NULL;
-}
-
-struct instr_translater {
- void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst,
- struct etna_inst_src *src);
- unsigned tgsi_opc;
- uint8_t opc;
-
- /* tgsi src -> etna src swizzle */
- int src[3];
-
- unsigned cond;
-};
-
-static void
-trans_instr(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
- struct etna_inst instr = { };
-
- instr.opcode = t->opc;
- instr.cond = t->cond;
- instr.sat = inst->Instruction.Saturate;
-
- assert(info->num_dst <= 1);
- if (info->num_dst)
- instr.dst = convert_dst(c, &inst->Dst[0]);
-
- assert(info->num_src <= ETNA_NUM_SRC);
-
- for (unsigned i = 0; i < info->num_src; i++) {
- int swizzle = t->src[i];
-
- assert(swizzle != -1);
- instr.src[swizzle] = src[i];
- }
-
- emit_inst(c, &instr);
-}
-
-static void
-trans_min_max(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst,
- struct etna_inst_src *src)
-{
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_SELECT,
- .cond = t->cond,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[0] = src[0],
- .src[1] = src[1],
- .src[2] = src[0],
- });
-}
-
-static void
-trans_if(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
- struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
-
- /* push IF to stack */
- f->type = ETNA_COMPILE_FRAME_IF;
- /* create "else" label */
- f->lbl_else_idx = alloc_new_label(c);
- f->lbl_endif_idx = -1;
-
- /* We need to avoid the emit_inst() below becoming two instructions */
- if (etna_src_uniforms_conflict(src[0], imm_0))
- src[0] = etna_mov_src(c, src[0]);
-
- /* mark position in instruction stream of label reference so that it can be
- * filled in in next pass */
- label_mark_use(c, f->lbl_else_idx);
-
- /* create conditional branch to label if src0 EQ 0 */
- emit_inst(c, &(struct etna_inst){
- .opcode = INST_OPCODE_BRANCH,
- .cond = INST_CONDITION_EQ,
- .src[0] = src[0],
- .src[1] = imm_0,
- /* imm is filled in later */
- });
-}
-
-static void
-trans_else(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- assert(c->frame_sp > 0);
- struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
- assert(f->type == ETNA_COMPILE_FRAME_IF);
-
- /* create "endif" label, and branch to endif label */
- f->lbl_endif_idx = alloc_new_label(c);
- label_mark_use(c, f->lbl_endif_idx);
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_BRANCH,
- .cond = INST_CONDITION_TRUE,
- /* imm is filled in later */
- });
-
- /* mark "else" label at this position in instruction stream */
- label_place(c, &c->labels[f->lbl_else_idx]);
-}
-
-static void
-trans_endif(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- assert(c->frame_sp > 0);
- struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
- assert(f->type == ETNA_COMPILE_FRAME_IF);
-
- /* assign "endif" or "else" (if no ELSE) label to current position in
- * instruction stream, pop IF */
- if (f->lbl_endif_idx != -1)
- label_place(c, &c->labels[f->lbl_endif_idx]);
- else
- label_place(c, &c->labels[f->lbl_else_idx]);
-}
-
-static void
-trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst,
- struct etna_inst_src *src)
-{
- struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
-
- /* push LOOP to stack */
- f->type = ETNA_COMPILE_FRAME_LOOP;
- f->lbl_loop_bgn_idx = alloc_new_label(c);
- f->lbl_loop_end_idx = alloc_new_label(c);
-
- label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
-
- c->num_loops++;
-}
-
-static void
-trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst,
- struct etna_inst_src *src)
-{
- assert(c->frame_sp > 0);
- struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
- assert(f->type == ETNA_COMPILE_FRAME_LOOP);
-
- /* mark position in instruction stream of label reference so that it can be
- * filled in in next pass */
- label_mark_use(c, f->lbl_loop_bgn_idx);
-
- /* create branch to loop_bgn label */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_BRANCH,
- .cond = INST_CONDITION_TRUE,
- .src[0] = src[0],
- /* imm is filled in later */
- });
-
- label_place(c, &c->labels[f->lbl_loop_end_idx]);
-}
-
-static void
-trans_brk(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- assert(c->frame_sp > 0);
- struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
-
- /* mark position in instruction stream of label reference so that it can be
- * filled in in next pass */
- label_mark_use(c, f->lbl_loop_end_idx);
-
- /* create branch to loop_end label */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_BRANCH,
- .cond = INST_CONDITION_TRUE,
- .src[0] = src[0],
- /* imm is filled in later */
- });
-}
-
-static void
-trans_cont(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- assert(c->frame_sp > 0);
- struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
-
- /* mark position in instruction stream of label reference so that it can be
- * filled in in next pass */
- label_mark_use(c, f->lbl_loop_bgn_idx);
-
- /* create branch to loop_end label */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_BRANCH,
- .cond = INST_CONDITION_TRUE,
- .src[0] = src[0],
- /* imm is filled in later */
- });
-}
-
-static void
-trans_deriv(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- emit_inst(c, &(struct etna_inst) {
- .opcode = t->opc,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[0] = src[0],
- .src[2] = src[0],
- });
-}
-
-static void
-trans_arl(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- struct etna_inst arl = { };
- struct etna_inst_dst dst;
-
- dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
- INST_COMPS_W);
-
- if (c->specs->has_sign_floor_ceil) {
- struct etna_inst floor = { };
-
- floor.opcode = INST_OPCODE_FLOOR;
- floor.src[2] = src[0];
- floor.dst = dst;
-
- emit_inst(c, &floor);
- } else {
- struct etna_inst floor[2] = { };
-
- floor[0].opcode = INST_OPCODE_FRC;
- floor[0].sat = inst->Instruction.Saturate;
- floor[0].dst = dst;
- floor[0].src[2] = src[0];
-
- floor[1].opcode = INST_OPCODE_ADD;
- floor[1].sat = inst->Instruction.Saturate;
- floor[1].dst = dst;
- floor[1].src[0] = src[0];
- floor[1].src[2].use = 1;
- floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
- floor[1].src[2].neg = 1;
- floor[1].src[2].rgroup = temp.rgroup;
- floor[1].src[2].reg = temp.id;
-
- emit_inst(c, &floor[0]);
- emit_inst(c, &floor[1]);
- }
-
- arl.opcode = INST_OPCODE_MOVAR;
- arl.sat = inst->Instruction.Saturate;
- arl.dst = convert_dst(c, &inst->Dst[0]);
- arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
-
- emit_inst(c, &arl);
-}
-
-static void
-trans_lrp(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- /* dst = src0 * src1 + (1 - src0) * src2
- * => src0 * src1 - (src0 - 1) * src2
- * => src0 * src1 - (src0 * src2 - src2)
- * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
- * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
- */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- if (etna_src_uniforms_conflict(src[0], src[1]) ||
- etna_src_uniforms_conflict(src[0], src[2])) {
- src[0] = etna_mov_src(c, src[0]);
- }
-
- struct etna_inst mad[2] = { };
- mad[0].opcode = INST_OPCODE_MAD;
- mad[0].sat = 0;
- mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W);
- mad[0].src[0] = src[0];
- mad[0].src[1] = src[2];
- mad[0].src[2] = negate(src[2]);
- mad[1].opcode = INST_OPCODE_MAD;
- mad[1].sat = inst->Instruction.Saturate;
- mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
- mad[1].src[1] = src[1];
- mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
-
- emit_inst(c, &mad[0]);
- emit_inst(c, &mad[1]);
-}
-
-static void
-trans_lit(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- /* SELECT.LT tmp._y__, 0, src.yyyy, 0
- * - can be eliminated if src.y is a uniform and >= 0
- * SELECT.GT tmp.___w, 128, src.wwww, 128
- * SELECT.LT tmp.___w, -128, tmp.wwww, -128
- * - can be eliminated if src.w is a uniform and fits clamp
- * LOG tmp.x, void, void, tmp.yyyy
- * MUL tmp.x, tmp.xxxx, tmp.wwww, void
- * LITP dst, undef, src.xxxx, tmp.xxxx
- */
- struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
- struct etna_inst_src src_y = { };
-
- if (!etna_rgroup_is_uniform(src[0].rgroup)) {
- src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
-
- struct etna_inst ins = { };
- ins.opcode = INST_OPCODE_SELECT;
- ins.cond = INST_CONDITION_LT;
- ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
- ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
- ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
- emit_inst(c, &ins);
- } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
- src_y = alloc_imm_f32(c, 0.0);
- else
- src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
-
- struct etna_inst_src src_w = { };
-
- if (!etna_rgroup_is_uniform(src[0].rgroup)) {
- src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
-
- struct etna_inst ins = { };
- ins.opcode = INST_OPCODE_SELECT;
- ins.cond = INST_CONDITION_GT;
- ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
- ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
- ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
- emit_inst(c, &ins);
- ins.cond = INST_CONDITION_LT;
- ins.src[0].neg = !ins.src[0].neg;
- ins.src[2].neg = !ins.src[2].neg;
- ins.src[1] = src_w;
- emit_inst(c, &ins);
- } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
- src_w = alloc_imm_f32(c, -128.);
- else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
- src_w = alloc_imm_f32(c, 128.);
- else
- src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
-
- if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LOG,
- .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
- .src[2] = src_y,
- .tex = { .amode=1 }, /* Unknown bit needs to be set */
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
- .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
- .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
- });
- } else {
- struct etna_inst ins[3] = { };
- ins[0].opcode = INST_OPCODE_LOG;
- ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
- ins[0].src[2] = src_y;
-
- emit_inst(c, &ins[0]);
- }
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = 0,
- .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
- .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
- .src[1] = src_w,
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LITP,
- .sat = 0,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
- .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
- .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
- });
-}
-
-static void
-trans_ssg(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- if (c->specs->has_sign_floor_ceil) {
- emit_inst(c, &(struct etna_inst){
- .opcode = INST_OPCODE_SIGN,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[2] = src[0],
- });
- } else {
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- struct etna_inst ins[2] = { };
-
- ins[0].opcode = INST_OPCODE_SET;
- ins[0].cond = INST_CONDITION_NZ;
- ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W);
- ins[0].src[0] = src[0];
-
- ins[1].opcode = INST_OPCODE_SELECT;
- ins[1].cond = INST_CONDITION_LZ;
- ins[1].sat = inst->Instruction.Saturate;
- ins[1].dst = convert_dst(c, &inst->Dst[0]);
- ins[1].src[0] = src[0];
- ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
- ins[1].src[1] = negate(ins[1].src[2]);
-
- emit_inst(c, &ins[0]);
- emit_inst(c, &ins[1]);
- }
-}
-
-static void
-trans_trig(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
- /* On newer chips alternative SIN/COS instructions are implemented,
- * which:
- * - Need their input scaled by 1/pi instead of 2/pi
- * - Output an x and y component, which need to be multiplied to
- * get the result
- */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_Z),
- .src[0] = src[0], /* any swizzling happens here */
- .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
- ? INST_OPCODE_COS
- : INST_OPCODE_SIN,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
- .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
- .tex = { .amode=1 }, /* Unknown bit needs to be set */
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
- .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
- });
-
- } else if (c->specs->has_sin_cos_sqrt) {
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- /* add divide by PI/2, using a temp register. GC2000
- * fails with src==dst for the trig instruction. */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W),
- .src[0] = src[0], /* any swizzling happens here */
- .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
- ? INST_OPCODE_COS
- : INST_OPCODE_SIN,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
- });
- } else {
- /* Implement Nick's fast sine/cosine. Taken from:
- * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
- * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
- * MAD t.x_zw, src.xxxx, A, B
- * FRC t.x_z_, void, void, t.xwzw
- * MAD t.x_z_, t.xwzw, 2, -1
- * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
- * DP3 t.x_z_, t.zyww, C, void (for sin)
- * DP3 t.__z_, t.zyww, C, void (for scs)
- * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
- * DP3 t.x_z_, t.xyww, C, void (for cos)
- * DP3 t.x___, t.xyww, C, void (for scs)
- * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
- * MAD dst, t.ywyw, .2225, t.xzxz
- */
- struct etna_inst *p, ins[9] = { };
- struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
- struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
- struct etna_inst_src sincos[3], in = src[0];
- sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
- sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
-
- /* A uniform source will cause the inner temp limit to
- * be exceeded. Explicitly deal with that scenario.
- */
- if (etna_rgroup_is_uniform(src[0].rgroup)) {
- struct etna_inst ins = { };
- ins.opcode = INST_OPCODE_MOV;
- ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
- ins.src[2] = in;
- emit_inst(c, &ins);
- in = t0s;
- }
-
- ins[0].opcode = INST_OPCODE_MAD;
- ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
- ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
- ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
- ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
-
- ins[1].opcode = INST_OPCODE_FRC;
- ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
- ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
-
- ins[2].opcode = INST_OPCODE_MAD;
- ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
- ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
- ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
- ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
-
- unsigned mul_swiz, dp3_swiz;
- if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
- mul_swiz = SWIZZLE(W, Z, W, W);
- dp3_swiz = SWIZZLE(Z, Y, W, W);
- } else {
- mul_swiz = SWIZZLE(W, X, W, W);
- dp3_swiz = SWIZZLE(X, Y, W, W);
- }
-
- ins[3].opcode = INST_OPCODE_MUL;
- ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
- ins[3].src[0] = swizzle(t0s, mul_swiz);
- ins[3].src[1] = absolute(ins[3].src[0]);
-
- ins[4].opcode = INST_OPCODE_DP3;
- ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
- ins[4].src[0] = swizzle(t0s, dp3_swiz);
- ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
-
- p = &ins[5];
- p->opcode = INST_OPCODE_MAD;
- p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
- p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
- p->src[1] = absolute(p->src[0]);
- p->src[2] = negate(p->src[0]);
-
- p++;
- p->opcode = INST_OPCODE_MAD;
- p->sat = inst->Instruction.Saturate;
- p->dst = convert_dst(c, &inst->Dst[0]),
- p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
- p->src[1] = alloc_imm_f32(c, 0.2225);
- p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
-
- for (int i = 0; &ins[i] <= p; i++)
- emit_inst(c, &ins[i]);
- }
-}
-
-static void
-trans_lg2(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- if (c->specs->has_new_transcendentals) {
- /* On newer chips alternative LOG instruction is implemented,
- * which outputs an x and y component, which need to be multiplied to
- * get the result.
- */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LOG,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
- .src[2] = src[0],
- .tex = { .amode=1 }, /* Unknown bit needs to be set */
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
- .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
- });
- } else {
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_LOG,
- .sat = inst->Instruction.Saturate,
- .dst = convert_dst(c, &inst->Dst[0]),
- .src[2] = src[0],
- });
- }
-}
-
-static void
-trans_sampler(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst,
- struct etna_inst_src *src)
-{
- /* There is no native support for GL texture rectangle coordinates, so
- * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
- if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
- uint32_t unit = inst->Src[1].Register.Index;
- struct etna_inst ins[2] = { };
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
-
- ins[0].opcode = INST_OPCODE_MUL;
- ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
- ins[0].src[0] = src[0];
- ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
-
- ins[1].opcode = INST_OPCODE_MUL;
- ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
- ins[1].src[0] = src[0];
- ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
-
- emit_inst(c, &ins[0]);
- emit_inst(c, &ins[1]);
-
- src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
- }
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_TEX:
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_TEXLD,
- .sat = 0,
- .dst = convert_dst(c, &inst->Dst[0]),
- .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
- .src[0] = src[0],
- });
- break;
-
- case TGSI_OPCODE_TXB:
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_TEXLDB,
- .sat = 0,
- .dst = convert_dst(c, &inst->Dst[0]),
- .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
- .src[0] = src[0],
- });
- break;
-
- case TGSI_OPCODE_TXL:
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_TEXLDL,
- .sat = 0,
- .dst = convert_dst(c, &inst->Dst[0]),
- .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
- .src[0] = src[0],
- });
- break;
-
- case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
-
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_RCP,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
- .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .sat = 0,
- .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z), /* tmp.xyz */
- .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
- .src[1] = src[0], /* src.xyzw */
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_TEXLD,
- .sat = 0,
- .dst = convert_dst(c, &inst->Dst[0]),
- .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
- .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
- });
- } break;
-
- default:
- BUG("Unhandled instruction %s",
- tgsi_get_opcode_name(inst->Instruction.Opcode));
- assert(0);
- break;
- }
-}
-
-static void
-trans_dummy(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- /* nothing to do */
-}
-
-static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
-#define INSTR(n, f, ...) \
- [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
-
- INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
- INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
- INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
- INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
- INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
- INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
- INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
- INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
- INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
- INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
- INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
- INSTR(LG2, trans_lg2),
- INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
- INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
- INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
- INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
- INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
-
- INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
- INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
-
- INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
- INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
-
- INSTR(IF, trans_if),
- INSTR(ELSE, trans_else),
- INSTR(ENDIF, trans_endif),
-
- INSTR(BGNLOOP, trans_loop_bgn),
- INSTR(ENDLOOP, trans_loop_end),
- INSTR(BRK, trans_brk),
- INSTR(CONT, trans_cont),
-
- INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
- INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
-
- INSTR(ARL, trans_arl),
- INSTR(LRP, trans_lrp),
- INSTR(LIT, trans_lit),
- INSTR(SSG, trans_ssg),
-
- INSTR(SIN, trans_trig),
- INSTR(COS, trans_trig),
-
- INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
- INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
- INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
- INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
- INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
- INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
-
- INSTR(TEX, trans_sampler),
- INSTR(TXB, trans_sampler),
- INSTR(TXL, trans_sampler),
- INSTR(TXP, trans_sampler),
-
- INSTR(NOP, trans_dummy),
- INSTR(END, trans_dummy),
-};
-
-/* Pass -- compile instructions */
-static void
-etna_compile_pass_generate_code(struct etna_compile *c)
-{
- struct tgsi_parse_context ctx = { };
- ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
- assert(status == TGSI_PARSE_OK);
-
- int inst_idx = 0;
- while (!tgsi_parse_end_of_tokens(&ctx)) {
- const struct tgsi_full_instruction *inst = 0;
-
- /* No inner temps used yet for this instruction, clear counter */
- c->inner_temps = 0;
-
- tgsi_parse_token(&ctx);
-
- switch (ctx.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- /* iterate over operands */
- inst = &ctx.FullToken.FullInstruction;
- if (c->dead_inst[inst_idx]) { /* skip dead instructions */
- inst_idx++;
- continue;
- }
-
- /* Lookup the TGSI information and generate the source arguments */
- struct etna_inst_src src[ETNA_NUM_SRC];
- memset(src, 0, sizeof(src));
-
- const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
-
- for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
- const struct tgsi_full_src_register *reg = &inst->Src[i];
- const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
-
- if (!n->valid || n->is_tex)
- continue;
-
- src[i] = etna_create_src(reg, n);
- }
-
- const unsigned opc = inst->Instruction.Opcode;
- const struct instr_translater *t = &translaters[opc];
-
- if (t->fxn) {
- t->fxn(t, c, inst, src);
-
- inst_idx += 1;
- } else {
- BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
- assert(0);
- }
- break;
- }
- }
- tgsi_parse_free(&ctx);
-}
-
-/* Look up register by semantic */
-static struct etna_reg_desc *
-find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
-{
- for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[file].reg[idx];
-
- if (reg->semantic.Name == name && reg->semantic.Index == index)
- return reg;
- }
-
- return NULL; /* not found */
-}
-
-/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
- * - this is a vertex shader
- * - and this is an older GPU
- */
-static void
-etna_compile_add_z_div_if_needed(struct etna_compile *c)
-{
- if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
- /* find position out */
- struct etna_reg_desc *pos_reg =
- find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
-
- if (pos_reg != NULL) {
- /*
- * ADD tX.__z_, tX.zzzz, void, tX.wwww
- * MUL tX.__z_, tX.zzzz, 0.5, void
- */
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_ADD,
- .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
- .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
- .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
- });
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MUL,
- .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
- .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
- .src[1] = alloc_imm_f32(c, 0.5f),
- });
- }
- }
-}
-
-static void
-etna_compile_frag_rb_swap(struct etna_compile *c)
-{
- if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
- /* find color out */
- struct etna_reg_desc *color_reg =
- find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
-
- emit_inst(c, &(struct etna_inst) {
- .opcode = INST_OPCODE_MOV,
- .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
- .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
- });
- }
-}
-
-/** add a NOP to the shader if
- * a) the shader is empty
- * or
- * b) there is a label at the end of the shader
- */
-static void
-etna_compile_add_nop_if_needed(struct etna_compile *c)
-{
- bool label_at_last_inst = false;
-
- for (int idx = 0; idx < c->labels_count; ++idx) {
- if (c->labels[idx].inst_idx == c->inst_ptr)
- label_at_last_inst = true;
-
- }
-
- if (c->inst_ptr == 0 || label_at_last_inst)
- emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
-}
-
-static void
-assign_uniforms(struct etna_compile_file *file, unsigned base)
-{
- for (int idx = 0; idx < file->reg_size; ++idx) {
- file->reg[idx].native.valid = 1;
- file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
- file->reg[idx].native.id = base + idx;
- }
-}
-
-/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
- * CONST must be consecutive as const buffers are supposed to be consecutive,
- * and before IMM, as this is
- * more convenient because is possible for the compilation process itself to
- * generate extra
- * immediates for constants such as pi, one, zero.
- */
-static void
-assign_constants_and_immediates(struct etna_compile *c)
-{
- assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
- /* immediates start after the constants */
- c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
- assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
- DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
- c->imm_size);
-}
-
-/* Assign declared samplers to native texture units */
-static void
-assign_texture_units(struct etna_compile *c)
-{
- uint tex_base = 0;
-
- if (c->info.processor == PIPE_SHADER_VERTEX)
- tex_base = c->specs->vertex_sampler_offset;
-
- for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
- c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
- c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
- c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
- }
-}
-
-/* Additional pass to fill in branch targets. This pass should be last
- * as no instruction reordering or removing/addition can be done anymore
- * once the branch targets are computed.
- */
-static void
-etna_compile_fill_in_labels(struct etna_compile *c)
-{
- for (int idx = 0; idx < c->inst_ptr; ++idx) {
- if (c->lbl_usage[idx] != -1)
- etna_assemble_set_imm(&c->code[idx * 4],
- c->labels[c->lbl_usage[idx]].inst_idx);
- }
-}
-
-/* compare two etna_native_reg structures, return true if equal */
-static bool
-cmp_etna_native_reg(const struct etna_native_reg to,
- const struct etna_native_reg from)
-{
- return to.valid == from.valid && to.is_tex == from.is_tex &&
- to.rgroup == from.rgroup && to.id == from.id;
-}
-
-/* go through all declarations and swap native registers *to* and *from* */
-static void
-swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
- const struct etna_native_reg from)
-{
- if (cmp_etna_native_reg(from, to))
- return; /* Nothing to do */
-
- for (int idx = 0; idx < c->total_decls; ++idx) {
- if (cmp_etna_native_reg(c->decl[idx].native, from)) {
- c->decl[idx].native = to;
- } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
- c->decl[idx].native = from;
- }
- }
-}
-
-/* For PS we need to permute so that inputs are always in temporary 0..N-1.
- * Semantic POS is always t0. If that semantic is not used, avoid t0.
- */
-static void
-permute_ps_inputs(struct etna_compile *c)
-{
- /* Special inputs:
- * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
- * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
- */
- uint native_idx = 1;
-
- for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
- uint input_id;
- assert(reg->has_semantic);
-
- if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
- continue;
-
- input_id = native_idx++;
- swap_native_registers(c, etna_native_temp(input_id),
- c->file[TGSI_FILE_INPUT].reg[idx].native);
- }
-
- c->num_varyings = native_idx - 1;
-
- if (native_idx > c->next_free_native)
- c->next_free_native = native_idx;
-}
-
-/* fill in ps inputs into shader object */
-static void
-fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
-{
- struct etna_shader_io_file *sf = &sobj->infile;
-
- sf->num_reg = 0;
-
- for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
-
- if (reg->native.id > 0) {
- assert(sf->num_reg < ETNA_NUM_INPUTS);
- sf->reg[sf->num_reg].reg = reg->native.id;
- sf->reg[sf->num_reg].semantic = reg->semantic;
- /* convert usage mask to number of components (*=wildcard)
- * .r (0..1) -> 1 component
- * .*g (2..3) -> 2 component
- * .**b (4..7) -> 3 components
- * .***a (8..15) -> 4 components
- */
- sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
- sf->num_reg++;
- }
- }
-
- assert(sf->num_reg == c->num_varyings);
- sobj->input_count_unk8 = 31; /* XXX what is this */
-}
-
-/* fill in output mapping for ps into shader object */
-static void
-fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
-{
- sobj->outfile.num_reg = 0;
-
- for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
-
- switch (reg->semantic.Name) {
- case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
- sobj->ps_color_out_reg = reg->native.id;
- break;
- case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
- sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
- break;
- default:
- assert(0); /* only outputs supported are COLOR and POSITION at the moment */
- }
- }
-}
-
-/* fill in inputs for vs into shader object */
-static void
-fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
-{
- struct etna_shader_io_file *sf = &sobj->infile;
-
- sf->num_reg = 0;
- for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
- assert(sf->num_reg < ETNA_NUM_INPUTS);
-
- if (!reg->native.valid)
- continue;
-
- /* XXX exclude inputs with special semantics such as gl_frontFacing */
- sf->reg[sf->num_reg].reg = reg->native.id;
- sf->reg[sf->num_reg].semantic = reg->semantic;
- sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
- sf->num_reg++;
- }
-
- sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
-}
-
-/* build two-level output index [Semantic][Index] for fast linking */
-static void
-build_output_index(struct etna_shader_variant *sobj)
-{
- int total = 0;
- int offset = 0;
-
- for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
- total += sobj->output_count_per_semantic[name];
-
- sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
-
- for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
- sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
- offset += sobj->output_count_per_semantic[name];
- }
-
- for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
- sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
- [sobj->outfile.reg[idx].semantic.Index] =
- &sobj->outfile.reg[idx];
- }
-}
-
-/* fill in outputs for vs into shader object */
-static void
-fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
-{
- struct etna_shader_io_file *sf = &sobj->outfile;
-
- sf->num_reg = 0;
- for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
- struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
- assert(sf->num_reg < ETNA_NUM_INPUTS);
-
- switch (reg->semantic.Name) {
- case TGSI_SEMANTIC_POSITION:
- sobj->vs_pos_out_reg = reg->native.id;
- break;
- case TGSI_SEMANTIC_PSIZE:
- sobj->vs_pointsize_out_reg = reg->native.id;
- break;
- default:
- sf->reg[sf->num_reg].reg = reg->native.id;
- sf->reg[sf->num_reg].semantic = reg->semantic;
- sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
- sf->num_reg++;
- sobj->output_count_per_semantic[reg->semantic.Name] =
- MAX2(reg->semantic.Index + 1,
- sobj->output_count_per_semantic[reg->semantic.Name]);
- }
- }
-
- /* build two-level index for linking */
- build_output_index(sobj);
-
- /* fill in "mystery meat" load balancing value. This value determines how
- * work is scheduled between VS and PS
- * in the unified shader architecture. More precisely, it is determined from
- * the number of VS outputs, as well as chip-specific
- * vertex output buffer size, vertex cache size, and the number of shader
- * cores.
- *
- * XXX this is a conservative estimate, the "optimal" value is only known for
- * sure at link time because some
- * outputs may be unused and thus unmapped. Then again, in the general use
- * case with GLSL the vertex and fragment
- * shaders are linked already before submitting to Gallium, thus all outputs
- * are used.
- */
- int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
- assert(half_out);
-
- uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
- 2 * half_out * c->specs->vertex_cache_size)) +
- 9) /
- 10;
- uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
- sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
- VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
- VIVS_VS_LOAD_BALANCING_C(0x3f) |
- VIVS_VS_LOAD_BALANCING_D(0x0f);
-}
-
-static bool
-etna_compile_check_limits(struct etna_compile *c)
-{
- int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
- ? c->specs->max_vs_uniforms
- : c->specs->max_ps_uniforms;
- /* round up number of uniforms, including immediates, in units of four */
- int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
-
- if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
- DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
- c->specs->max_instructions);
- return false;
- }
-
- if (c->next_free_native > c->specs->max_registers) {
- DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
- c->specs->max_registers);
- return false;
- }
-
- if (num_uniforms > max_uniforms) {
- DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
- max_uniforms);
- return false;
- }
-
- if (c->num_varyings > c->specs->max_varyings) {
- DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
- c->specs->max_varyings);
- return false;
- }
-
- if (c->imm_base > c->specs->num_constants) {
- DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
- c->specs->num_constants);
- }
-
- return true;
-}
-
-static void
-copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
-{
- uint32_t count = c->imm_base + c->imm_size;
- struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
-
- uinfo->imm_count = count;
-
- uinfo->imm_data = malloc(count * sizeof(*c->imm_data));
- for (unsigned i = 0; i < c->imm_base; i++)
- uinfo->imm_data[i] = i;
- memcpy(&uinfo->imm_data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));
-
- uinfo->imm_contents = malloc(count * sizeof(*c->imm_contents));
- for (unsigned i = 0; i < c->imm_base; i++)
- uinfo->imm_contents[i] = ETNA_IMMEDIATE_UNIFORM;
- memcpy(&uinfo->imm_contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));
-
- etna_set_shader_uniforms_dirty_flags(sobj);
-}
-
-bool
-etna_compile_shader(struct etna_shader_variant *v)
-{
- if (DBG_ENABLED(ETNA_DBG_NIR))
- return etna_compile_shader_nir(v);
-
- /* Create scratch space that may be too large to fit on stack
- */
- bool ret;
- struct etna_compile *c;
-
- if (unlikely(!v))
- return false;
-
- const struct etna_specs *specs = v->shader->specs;
-
- struct tgsi_lowering_config lconfig = {
- .lower_FLR = !specs->has_sign_floor_ceil,
- .lower_CEIL = !specs->has_sign_floor_ceil,
- .lower_POW = true,
- .lower_EXP = true,
- .lower_LOG = true,
- .lower_DP2 = !specs->has_halti2_instructions,
- .lower_TRUNC = true,
- };
-
- c = CALLOC_STRUCT(etna_compile);
- if (!c)
- return false;
-
- memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
-
- const struct tgsi_token *tokens = v->shader->tokens;
-
- c->specs = specs;
- c->key = &v->key;
- c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
- c->free_tokens = !!c->tokens;
- if (!c->tokens) {
- /* no lowering */
- c->tokens = tokens;
- }
-
- /* Build a map from gallium register to native registers for files
- * CONST, SAMP, IMM, OUT, IN, TEMP.
- * SAMP will map as-is for fragment shaders, there will be a +8 offset for
- * vertex shaders.
- */
- /* Pass one -- check register file declarations and immediates */
- etna_compile_parse_declarations(c);
-
- etna_allocate_decls(c);
-
- /* Pass two -- check usage of temporaries, inputs, outputs */
- etna_compile_pass_check_usage(c);
-
- assign_special_inputs(c);
-
- /* Assign native temp register to TEMPs */
- assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
-
- /* optimize outputs */
- etna_compile_pass_optimize_outputs(c);
-
- /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
- * this is part of RGROUP_INTERNAL
- */
-
- /* assign inputs: last usage of input should be <= first usage of temp */
- /* potential optimization case:
- * if single MOV TEMP[y], IN[x] before which temp y is not used, and
- * after which IN[x]
- * is not read, temp[y] can be used as input register as-is
- */
- /* sort temporaries by first use
- * sort inputs by last usage
- * iterate over inputs, temporaries
- * if last usage of input <= first usage of temp:
- * assign input to temp
- * advance input, temporary pointer
- * else
- * advance temporary pointer
- *
- * potential problem: instruction with multiple inputs of which one is the
- * temp and the other is the input;
- * however, as the temp is not used before this, how would this make
- * sense? uninitialized temporaries have an undefined
- * value, so this would be ok
- */
- assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
-
- /* assign outputs: first usage of output should be >= last usage of temp */
- /* potential optimization case:
- * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
- * writing all components that are used in
- * the shader) after which temp y is no longer used temp[y] can be
- * used as output register as-is
- *
- * potential problem: instruction with multiple outputs of which one is the
- * temp and the other is the output;
- * however, as the temp is not used after this, how would this make
- * sense? could just discard the output value
- */
- /* sort temporaries by last use
- * sort outputs by first usage
- * iterate over outputs, temporaries
- * if first usage of output >= last usage of temp:
- * assign output to temp
- * advance output, temporary pointer
- * else
- * advance temporary pointer
- */
- assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
-
- assign_constants_and_immediates(c);
- assign_texture_units(c);
-
- /* list declarations */
- for (int x = 0; x < c->total_decls; ++x) {
- DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
- "last_use=%i native=%i usage_mask=%x "
- "has_semantic=%i",
- x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
- c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
- c->decl[x].native.valid ? c->decl[x].native.id : -1,
- c->decl[x].usage_mask, c->decl[x].has_semantic);
- if (c->decl[x].has_semantic)
- DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
- tgsi_semantic_names[c->decl[x].semantic.Name],
- c->decl[x].semantic.Index);
- }
- /* XXX for PS we need to permute so that inputs are always in temporary
- * 0..N-1.
- * There is no "switchboard" for varyings (AFAIK!). The output color,
- * however, can be routed
- * from an arbitrary temporary.
- */
- if (c->info.processor == PIPE_SHADER_FRAGMENT)
- permute_ps_inputs(c);
-
-
- /* list declarations */
- for (int x = 0; x < c->total_decls; ++x) {
- DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
- "last_use=%i native=%i usage_mask=%x "
- "has_semantic=%i",
- x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
- c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
- c->decl[x].native.valid ? c->decl[x].native.id : -1,
- c->decl[x].usage_mask, c->decl[x].has_semantic);
- if (c->decl[x].has_semantic)
- DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
- tgsi_semantic_names[c->decl[x].semantic.Name],
- c->decl[x].semantic.Index);
- }
-
- /* pass 3: generate instructions */
- etna_compile_pass_generate_code(c);
- etna_compile_add_z_div_if_needed(c);
- etna_compile_frag_rb_swap(c);
- etna_compile_add_nop_if_needed(c);
-
- ret = etna_compile_check_limits(c);
- if (!ret)
- goto out;
-
- etna_compile_fill_in_labels(c);
-
- /* fill in output structure */
- v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
- v->code_size = c->inst_ptr * 4;
- v->code = mem_dup(c->code, c->inst_ptr * 16);
- v->num_loops = c->num_loops;
- v->num_temps = c->next_free_native;
- v->vs_id_in_reg = -1;
- v->vs_pos_out_reg = -1;
- v->vs_pointsize_out_reg = -1;
- v->ps_color_out_reg = -1;
- v->ps_depth_out_reg = -1;
- v->needs_icache = c->inst_ptr > c->specs->max_instructions;
- copy_uniform_state_to_shader(c, v);
-
- if (c->info.processor == PIPE_SHADER_VERTEX) {
- fill_in_vs_inputs(v, c);
- fill_in_vs_outputs(v, c);
- } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
- fill_in_ps_inputs(v, c);
- fill_in_ps_outputs(v, c);
- }
-
-out:
- if (c->free_tokens)
- FREE((void *)c->tokens);
-
- FREE(c->labels);
- FREE(c);
-
- return ret;
-}
-
-extern const char *tgsi_swizzle_names[];
-void
-etna_dump_shader(const struct etna_shader_variant *shader)
-{
- if (shader->stage == MESA_SHADER_VERTEX)
- printf("VERT\n");
- else
- printf("FRAG\n");
-
-
- etna_disasm(shader->code, shader->code_size, PRINT_RAW);
-
- printf("num loops: %i\n", shader->num_loops);
- printf("num temps: %i\n", shader->num_temps);
- printf("immediates:\n");
- for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
- printf(" [%i].%s = %f (0x%08x) (%d)\n",
- idx / 4,
- tgsi_swizzle_names[idx % 4],
- *((float *)&shader->uniforms.imm_data[idx]),
- shader->uniforms.imm_data[idx],
- shader->uniforms.imm_contents[idx]);
- }
-
- if (DBG_ENABLED(ETNA_DBG_NIR)) {
- printf("inputs:\n");
- for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
- printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg,
- (shader->stage == MESA_SHADER_VERTEX) ?
- gl_vert_attrib_name(shader->infile.reg[idx].slot) :
- gl_varying_slot_name(shader->infile.reg[idx].slot),
- shader->infile.reg[idx].num_components);
- }
- printf("outputs:\n");
- for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
- printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg,
- (shader->stage == MESA_SHADER_VERTEX) ?
- gl_varying_slot_name(shader->outfile.reg[idx].slot) :
- gl_frag_result_name(shader->outfile.reg[idx].slot),
- shader->outfile.reg[idx].num_components);
- }
- } else {
- printf("inputs:\n");
- for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
- printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
- tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
- shader->infile.reg[idx].semantic.Index,
- shader->infile.reg[idx].num_components);
- }
- printf("outputs:\n");
- for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
- printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
- tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
- shader->outfile.reg[idx].semantic.Index,
- shader->outfile.reg[idx].num_components);
- }
- }
- printf("special:\n");
- if (shader->stage == MESA_SHADER_VERTEX) {
- printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
- printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
- printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
- } else {
- printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg);
- printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
- }
- printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8);
-}
-
-void
-etna_destroy_shader(struct etna_shader_variant *shader)
-{
- if (DBG_ENABLED(ETNA_DBG_NIR))
- return etna_destroy_shader_nir(shader);
-
- assert(shader);
-
- FREE(shader->code);
- FREE(shader->uniforms.imm_data);
- FREE(shader->uniforms.imm_contents);
- FREE(shader->output_per_semantic_list);
- FREE(shader);
-}
-
-static const struct etna_shader_inout *
-etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
- const struct etna_shader_inout *in)
-{
- if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
- return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
-
- return NULL;
-}
-
-bool
-etna_link_shader(struct etna_shader_link_info *info,
- const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
-{
- if (DBG_ENABLED(ETNA_DBG_NIR))
- return etna_link_shader_nir(info, vs, fs);
-
- int comp_ofs = 0;
- /* For each fragment input we need to find the associated vertex shader
- * output, which can be found by matching on semantic name and index. A
- * binary search could be used because the vs outputs are sorted by their
- * semantic index and grouped by semantic type by fill_in_vs_outputs.
- */
- assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
- info->pcoord_varying_comp_ofs = -1;
-
- for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
- const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
- const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
- struct etna_varying *varying;
- bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
-
- assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
-
- if (fsio->reg > info->num_varyings)
- info->num_varyings = fsio->reg;
-
- varying = &info->varyings[fsio->reg - 1];
- varying->num_components = fsio->num_components;
-
- if (!interpolate_always) /* colors affected by flat shading */
- varying->pa_attributes = 0x200;
- else /* texture coord or other bypasses flat shading */
- varying->pa_attributes = 0x2f1;
-
- varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
- varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
- varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
- varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
-
- /* point coord is an input to the PS without matching VS output,
- * so it gets a varying slot without being assigned a VS register.
- */
- if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
- varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
- varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
-
- info->pcoord_varying_comp_ofs = comp_ofs;
- } else {
- if (vsio == NULL) { /* not found -- link error */
- BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
- return true;
- }
-
- varying->reg = vsio->reg;
- }
-
- comp_ofs += varying->num_components;
- }
-
- assert(info->num_varyings == fs->infile.num_reg);
-
- return false;
-}
--- /dev/null
+/*
+ * Copyright (c) 2012-2015 Etnaviv Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wladimir J. van der Laan <laanwj@gmail.com>
+ */
+
+/* TGSI->Vivante shader ISA conversion */
+
+/* What does the compiler return (see etna_shader_object)?
+ * 1) instruction data
+ * 2) input-to-temporary mapping (fixed for ps)
+ * *) in case of ps, semantic -> varying id mapping
+ * *) for each varying: number of components used (r, rg, rgb, rgba)
+ * 3) temporary-to-output mapping (in case of vs, fixed for ps)
+ * 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
+ * 5) immediates base offset, immediates data
+ * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
+ * configure the hw, but useful for error checking
+ * 7) enough information to add the z=(z+w)/2.0 necessary for older chips
+ * (output reg id is enough)
+ *
+ * Empty shaders are not allowed, should always at least generate a NOP. Also
+ * if there is a label at the end of the shader, an extra NOP should be
+ * generated as jump target.
+ *
+ * TODO
+ * * Use an instruction scheduler
+ * * Indirect access to uniforms / temporaries using amode
+ */
+
+#include "etnaviv_compiler.h"
+
+#include "etnaviv_asm.h"
+#include "etnaviv_context.h"
+#include "etnaviv_debug.h"
+#include "etnaviv_disasm.h"
+#include "etnaviv_uniforms.h"
+#include "etnaviv_util.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_iterate.h"
+#include "tgsi/tgsi_lowering.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define ETNA_MAX_INNER_TEMPS 2
+
+static const float sincos_const[2][4] = {
+ {
+ 2., -1., 4., -4.,
+ },
+ {
+ 1. / (2. * M_PI), 0.75, 0.5, 0.0,
+ },
+};
+
+/* Native register description structure */
+struct etna_native_reg {
+ unsigned valid : 1;
+ unsigned is_tex : 1; /* is texture unit, overrides rgroup */
+ unsigned rgroup : 3;
+ unsigned id : 9;
+};
+
+/* Register description */
+struct etna_reg_desc {
+ enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
+ int idx; /* index into file */
+ bool active; /* used in program */
+ int first_use; /* instruction id of first use (scope begin) */
+ int last_use; /* instruction id of last use (scope end, inclusive) */
+
+ struct etna_native_reg native; /* native register to map to */
+ unsigned usage_mask : 4; /* usage, per channel */
+ bool has_semantic; /* register has associated TGSI semantic */
+ struct tgsi_declaration_semantic semantic; /* TGSI semantic */
+ struct tgsi_declaration_interp interp; /* Interpolation type */
+};
+
+/* Label information structure */
+struct etna_compile_label {
+ int inst_idx; /* Instruction id that label points to */
+};
+
+enum etna_compile_frame_type {
+ ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
+ ETNA_COMPILE_FRAME_LOOP,
+};
+
+/* nesting scope frame (LOOP, IF, ...) during compilation
+ */
+struct etna_compile_frame {
+ enum etna_compile_frame_type type;
+ int lbl_else_idx;
+ int lbl_endif_idx;
+ int lbl_loop_bgn_idx;
+ int lbl_loop_end_idx;
+};
+
+struct etna_compile_file {
+ /* Number of registers in each TGSI file (max register+1) */
+ size_t reg_size;
+ /* Register descriptions, per register index */
+ struct etna_reg_desc *reg;
+};
+
+#define array_insert(arr, val) \
+ do { \
+ if (arr##_count == arr##_sz) { \
+ arr##_sz = MAX2(2 * arr##_sz, 16); \
+ arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
+ } \
+ arr[arr##_count++] = val; \
+ } while (0)
+
+
+/* scratch area for compiling shader, freed after compilation finishes */
+struct etna_compile {
+ const struct tgsi_token *tokens;
+ bool free_tokens;
+
+ struct tgsi_shader_info info;
+
+ /* Register descriptions, per TGSI file, per register index */
+ struct etna_compile_file file[TGSI_FILE_COUNT];
+
+ /* Keep track of TGSI register declarations */
+ struct etna_reg_desc decl[ETNA_MAX_DECL];
+ uint total_decls;
+
+ /* Bitmap of dead instructions which are removed in a separate pass */
+ bool dead_inst[ETNA_MAX_TOKENS];
+
+ /* Immediate data */
+ enum etna_immediate_contents imm_contents[ETNA_MAX_IMM];
+ uint32_t imm_data[ETNA_MAX_IMM];
+ uint32_t imm_base; /* base of immediates (in 32 bit units) */
+ uint32_t imm_size; /* size of immediates (in 32 bit units) */
+
+ /* Next free native register, for register allocation */
+ uint32_t next_free_native;
+
+ /* Temporary register for use within translated TGSI instruction,
+ * only allocated when needed.
+ */
+ int inner_temps; /* number of inner temps used; only up to one available at
+ this point */
+ struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
+
+ /* Fields for handling nested conditionals */
+ struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
+ int frame_sp;
+ int lbl_usage[ETNA_MAX_INSTRUCTIONS];
+
+ unsigned labels_count, labels_sz;
+ struct etna_compile_label *labels;
+
+ unsigned num_loops;
+
+ /* Code generation */
+ int inst_ptr; /* current instruction pointer */
+ uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
+
+ /* I/O */
+
+ /* Number of varyings (PS only) */
+ int num_varyings;
+
+ /* GPU hardware specs */
+ const struct etna_specs *specs;
+
+ const struct etna_shader_key *key;
+};
+
+static struct etna_reg_desc *
+etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
+{
+ return &c->file[dst.File].reg[dst.Index];
+}
+
+static struct etna_reg_desc *
+etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
+{
+ return &c->file[src.File].reg[src.Index];
+}
+
+static struct etna_native_reg
+etna_native_temp(unsigned reg)
+{
+ return (struct etna_native_reg) {
+ .valid = 1,
+ .rgroup = INST_RGROUP_TEMP,
+ .id = reg
+ };
+}
+
+/** Register allocation **/
+enum reg_sort_order {
+ FIRST_USE_ASC,
+ FIRST_USE_DESC,
+ LAST_USE_ASC,
+ LAST_USE_DESC
+};
+
+/* Augmented register description for sorting */
+struct sort_rec {
+ struct etna_reg_desc *ptr;
+ int key;
+};
+
+static int
+sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
+{
+ if (a->key < b->key)
+ return -1;
+
+ if (a->key > b->key)
+ return 1;
+
+ return 0;
+}
+
+/* create an index on a register set based on certain criteria. */
+static int
+sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
+ enum reg_sort_order so)
+{
+ struct etna_reg_desc *regs = file->reg;
+ int ptr = 0;
+
+ /* pre-populate keys from active registers */
+ for (int idx = 0; idx < file->reg_size; ++idx) {
+ /* only interested in active registers now; will only assign inactive ones
+ * if no space in active ones */
+ if (regs[idx].active) {
+ sorted[ptr].ptr = ®s[idx];
+
+ switch (so) {
+ case FIRST_USE_ASC:
+ sorted[ptr].key = regs[idx].first_use;
+ break;
+ case LAST_USE_ASC:
+ sorted[ptr].key = regs[idx].last_use;
+ break;
+ case FIRST_USE_DESC:
+ sorted[ptr].key = -regs[idx].first_use;
+ break;
+ case LAST_USE_DESC:
+ sorted[ptr].key = -regs[idx].last_use;
+ break;
+ }
+ ptr++;
+ }
+ }
+
+ /* sort index by key */
+ qsort(sorted, ptr, sizeof(struct sort_rec),
+ (int (*)(const void *, const void *))sort_rec_compar);
+
+ return ptr;
+}
+
+/* Allocate a new, unused, native temp register */
+static struct etna_native_reg
+alloc_new_native_reg(struct etna_compile *c)
+{
+ assert(c->next_free_native < ETNA_MAX_TEMPS);
+ return etna_native_temp(c->next_free_native++);
+}
+
+/* assign TEMPs to native registers */
+static void
+assign_temporaries_to_native(struct etna_compile *c,
+ struct etna_compile_file *file)
+{
+ struct etna_reg_desc *temps = file->reg;
+
+ for (int idx = 0; idx < file->reg_size; ++idx)
+ temps[idx].native = alloc_new_native_reg(c);
+}
+
+/* assign inputs and outputs to temporaries
+ * Gallium assumes that the hardware has separate registers for taking input and
+ * output, however Vivante GPUs use temporaries both for passing in inputs and
+ * passing back outputs.
+ * Try to re-use temporary registers where possible. */
+static void
+assign_inouts_to_temporaries(struct etna_compile *c, uint file)
+{
+ bool mode_inputs = (file == TGSI_FILE_INPUT);
+ int inout_ptr = 0, num_inouts;
+ int temp_ptr = 0, num_temps;
+ struct sort_rec inout_order[ETNA_MAX_TEMPS];
+ struct sort_rec temps_order[ETNA_MAX_TEMPS];
+ num_inouts = sort_registers(inout_order, &c->file[file],
+ mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
+ num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
+ mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
+
+ while (inout_ptr < num_inouts && temp_ptr < num_temps) {
+ struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
+ struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
+
+ if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
+ inout_ptr++;
+ continue;
+ }
+
+ /* last usage of this input is before or in same instruction of first use
+ * of temporary? */
+ if (mode_inputs ? (inout->last_use <= temp->first_use)
+ : (inout->first_use >= temp->last_use)) {
+ /* assign it and advance to next input */
+ inout->native = temp->native;
+ inout_ptr++;
+ }
+
+ temp_ptr++;
+ }
+
+ /* if we couldn't reuse current ones, allocate new temporaries */
+ for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
+ struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
+
+ if (inout->active && !inout->native.valid)
+ inout->native = alloc_new_native_reg(c);
+ }
+}
+
+/* Allocate an immediate with a certain value and return the index. If
+ * there is already an immediate with that value, return that.
+ */
+static struct etna_inst_src
+alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents,
+ uint32_t value)
+{
+ int idx;
+
+ /* Could use a hash table to speed this up */
+ for (idx = 0; idx < c->imm_size; ++idx) {
+ if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
+ break;
+ }
+
+ /* look if there is an unused slot */
+ if (idx == c->imm_size) {
+ for (idx = 0; idx < c->imm_size; ++idx) {
+ if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED)
+ break;
+ }
+ }
+
+ /* allocate new immediate */
+ if (idx == c->imm_size) {
+ assert(c->imm_size < ETNA_MAX_IMM);
+ idx = c->imm_size++;
+ c->imm_data[idx] = value;
+ c->imm_contents[idx] = contents;
+ }
+
+ /* swizzle so that component with value is returned in all components */
+ idx += c->imm_base;
+ struct etna_inst_src imm_src = {
+ .use = 1,
+ .rgroup = INST_RGROUP_UNIFORM_0,
+ .reg = idx / 4,
+ .swiz = INST_SWIZ_BROADCAST(idx & 3)
+ };
+
+ return imm_src;
+}
+
+static struct etna_inst_src
+alloc_imm_u32(struct etna_compile *c, uint32_t value)
+{
+ return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value);
+}
+
+static struct etna_inst_src
+alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents,
+ const uint32_t *values)
+{
+ struct etna_inst_src imm_src = { };
+ int idx, i;
+
+ for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
+ /* What if we can use a uniform with a different swizzle? */
+ for (i = 0; i < 4; i++)
+ if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
+ break;
+ if (i == 4)
+ break;
+ }
+
+ if (idx + 3 >= c->imm_size) {
+ idx = align(c->imm_size, 4);
+ assert(idx + 4 <= ETNA_MAX_IMM);
+
+ for (i = 0; i < 4; i++) {
+ c->imm_data[idx + i] = values[i];
+ c->imm_contents[idx + i] = contents;
+ }
+
+ c->imm_size = idx + 4;
+ }
+
+ assert((c->imm_base & 3) == 0);
+ idx += c->imm_base;
+ imm_src.use = 1;
+ imm_src.rgroup = INST_RGROUP_UNIFORM_0;
+ imm_src.reg = idx / 4;
+ imm_src.swiz = INST_SWIZ_IDENTITY;
+
+ return imm_src;
+}
+
+static uint32_t
+get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
+ unsigned swiz_idx)
+{
+ assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
+ unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
+
+ return c->imm_data[idx];
+}
+
+/* Allocate immediate with a certain float value. If there is already an
+ * immediate with that value, return that.
+ */
+static struct etna_inst_src
+alloc_imm_f32(struct etna_compile *c, float value)
+{
+ return alloc_imm_u32(c, fui(value));
+}
+
+static struct etna_inst_src
+etna_imm_vec4f(struct etna_compile *c, const float *vec4)
+{
+ uint32_t val[4];
+
+ for (int i = 0; i < 4; i++)
+ val[i] = fui(vec4[i]);
+
+ return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val);
+}
+
+/* Pass -- check register file declarations and immediates */
+static void
+etna_compile_parse_declarations(struct etna_compile *c)
+{
+ struct tgsi_parse_context ctx = { };
+ ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
+ assert(status == TGSI_PARSE_OK);
+
+ while (!tgsi_parse_end_of_tokens(&ctx)) {
+ tgsi_parse_token(&ctx);
+
+ switch (ctx.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ /* immediates are handled differently from other files; they are
+ * not declared explicitly, and always add four components */
+ const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
+ assert(c->imm_size <= (ETNA_MAX_IMM - 4));
+
+ for (int i = 0; i < 4; ++i) {
+ unsigned idx = c->imm_size++;
+
+ c->imm_data[idx] = imm->u[i].Uint;
+ c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT;
+ }
+ }
+ break;
+ }
+ }
+
+ tgsi_parse_free(&ctx);
+}
+
+/* Allocate register declarations for the registers in all register files */
+static void
+etna_allocate_decls(struct etna_compile *c)
+{
+ uint idx = 0;
+
+ for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
+ c->file[x].reg = &c->decl[idx];
+ c->file[x].reg_size = c->info.file_max[x] + 1;
+
+ for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
+ c->decl[idx].file = x;
+ c->decl[idx].idx = sub;
+ idx++;
+ }
+ }
+
+ c->total_decls = idx;
+}
+
+/* Pass -- check and record usage of temporaries, inputs, outputs */
+static void
+etna_compile_pass_check_usage(struct etna_compile *c)
+{
+ struct tgsi_parse_context ctx = { };
+ ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
+ assert(status == TGSI_PARSE_OK);
+
+ for (int idx = 0; idx < c->total_decls; ++idx) {
+ c->decl[idx].active = false;
+ c->decl[idx].first_use = c->decl[idx].last_use = -1;
+ }
+
+ int inst_idx = 0;
+ while (!tgsi_parse_end_of_tokens(&ctx)) {
+ tgsi_parse_token(&ctx);
+ /* find out max register #s used
+ * For every register mark first and last instruction index where it's
+ * used this allows finding ranges where the temporary can be borrowed
+ * as input and/or output register
+ *
+ * XXX in the case of loops this needs special care, or even be completely
+ * disabled, as
+ * the last usage of a register inside a loop means it can still be used
+ * on next loop
+ * iteration (execution is no longer * chronological). The register can
+ * only be
+ * declared "free" after the loop finishes.
+ *
+ * Same for inputs: the first usage of a register inside a loop doesn't
+ * mean that the register
+ * won't have been overwritten in previous iteration. The register can
+ * only be declared free before the loop
+ * starts.
+ * The proper way would be to do full dominator / post-dominator analysis
+ * (especially with more complicated
+ * control flow such as direct branch instructions) but not for now...
+ */
+ switch (ctx.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION: {
+ /* Declaration: fill in file details */
+ const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
+ struct etna_compile_file *file = &c->file[decl->Declaration.File];
+
+ for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
+ file->reg[idx].usage_mask = 0; // we'll compute this ourselves
+ file->reg[idx].has_semantic = decl->Declaration.Semantic;
+ file->reg[idx].semantic = decl->Semantic;
+ file->reg[idx].interp = decl->Interp;
+ }
+ } break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ /* Instruction: iterate over operands of instruction */
+ const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
+
+ /* iterate over destination registers */
+ for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
+ struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
+
+ if (reg_desc->first_use == -1)
+ reg_desc->first_use = inst_idx;
+
+ reg_desc->last_use = inst_idx;
+ reg_desc->active = true;
+ }
+
+ /* iterate over source registers */
+ for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
+ struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
+
+ if (reg_desc->first_use == -1)
+ reg_desc->first_use = inst_idx;
+
+ reg_desc->last_use = inst_idx;
+ reg_desc->active = true;
+ /* accumulate usage mask for register, this is used to determine how
+ * many slots for varyings
+ * should be allocated */
+ reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
+ }
+ inst_idx += 1;
+ } break;
+ default:
+ break;
+ }
+ }
+
+ tgsi_parse_free(&ctx);
+}
+
+/* assign inputs that need to be assigned to specific registers */
+static void
+assign_special_inputs(struct etna_compile *c)
+{
+ if (c->info.processor == PIPE_SHADER_FRAGMENT) {
+ /* never assign t0 as it is the position output, start assigning at t1 */
+ c->next_free_native = 1;
+
+ /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
+ for (int idx = 0; idx < c->total_decls; ++idx) {
+ struct etna_reg_desc *reg = &c->decl[idx];
+
+ if (reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION)
+ reg->native = etna_native_temp(0);
+ }
+ }
+}
+
+/* Check that a move instruction does not swizzle any of the components
+ * that it writes.
+ */
+static bool
+etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
+ const struct tgsi_src_register src)
+{
+ return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
+ (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
+ (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
+ (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
+}
+
+/* Pass -- optimize outputs
+ * Mesa tends to generate code like this at the end if their shaders
+ * MOV OUT[1], TEMP[2]
+ * MOV OUT[0], TEMP[0]
+ * MOV OUT[2], TEMP[1]
+ * Recognize if
+ * a) there is only a single assignment to an output register and
+ * b) the temporary is not used after that
+ * Also recognize direct assignment of IN to OUT (passthrough)
+ **/
+static void
+etna_compile_pass_optimize_outputs(struct etna_compile *c)
+{
+ struct tgsi_parse_context ctx = { };
+ int inst_idx = 0;
+ ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
+ assert(status == TGSI_PARSE_OK);
+
+ while (!tgsi_parse_end_of_tokens(&ctx)) {
+ tgsi_parse_token(&ctx);
+
+ switch (ctx.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION: {
+ const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
+
+ /* iterate over operands */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV: {
+ /* We are only interested in eliminating MOVs which write to
+ * the shader outputs. Test for this early. */
+ if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
+ break;
+ /* Elimination of a MOV must have no visible effect on the
+ * resulting shader: this means the MOV must not swizzle or
+ * saturate, and its source must not have the negate or
+ * absolute modifiers. */
+ if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
+ inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
+ inst->Src[0].Register.Absolute)
+ break;
+
+ uint out_idx = inst->Dst[0].Register.Index;
+ uint in_idx = inst->Src[0].Register.Index;
+ /* assignment of temporary to output --
+ * and the output doesn't yet have a native register assigned
+ * and the last use of the temporary is this instruction
+ * and the MOV does not do a swizzle
+ */
+ if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
+ !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
+ c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
+ c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
+ c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
+ /* prevent temp from being re-used for the rest of the shader */
+ c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
+ /* mark this MOV instruction as a no-op */
+ c->dead_inst[inst_idx] = true;
+ }
+ /* direct assignment of input to output --
+ * and the input or output doesn't yet have a native register
+ * assigned
+ * and the output is only used in this instruction,
+ * allocate a new register, and associate both input and output to
+ * it
+ * and the MOV does not do a swizzle
+ */
+ if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
+ !c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
+ !c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
+ c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
+ c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
+ c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
+ c->file[TGSI_FILE_INPUT].reg[in_idx].native =
+ alloc_new_native_reg(c);
+ /* mark this MOV instruction as a no-op */
+ c->dead_inst[inst_idx] = true;
+ }
+ } break;
+ default:;
+ }
+ inst_idx += 1;
+ } break;
+ }
+ }
+
+ tgsi_parse_free(&ctx);
+}
+
+/* Get a temporary to be used within one TGSI instruction.
+ * The first time that this function is called the temporary will be allocated.
+ * Each call to this function will return the same temporary.
+ */
+static struct etna_native_reg
+etna_compile_get_inner_temp(struct etna_compile *c)
+{
+ int inner_temp = c->inner_temps;
+
+ if (inner_temp < ETNA_MAX_INNER_TEMPS) {
+ if (!c->inner_temp[inner_temp].valid)
+ c->inner_temp[inner_temp] = alloc_new_native_reg(c);
+
+ /* alloc_new_native_reg() handles lack of registers */
+ c->inner_temps += 1;
+ } else {
+ BUG("Too many inner temporaries (%i) requested in one instruction",
+ inner_temp + 1);
+ }
+
+ return c->inner_temp[inner_temp];
+}
+
+static struct etna_inst_dst
+etna_native_to_dst(struct etna_native_reg native, unsigned comps)
+{
+ /* Can only assign to temporaries */
+ assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
+
+ struct etna_inst_dst rv = {
+ .write_mask = comps,
+ .use = 1,
+ .reg = native.id,
+ };
+
+ return rv;
+}
+
+static struct etna_inst_src
+etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
+{
+ assert(native.valid && !native.is_tex);
+
+ struct etna_inst_src rv = {
+ .use = 1,
+ .swiz = swizzle,
+ .rgroup = native.rgroup,
+ .reg = native.id,
+ .amode = INST_AMODE_DIRECT,
+ };
+
+ return rv;
+}
+
+static inline struct etna_inst_src
+negate(struct etna_inst_src src)
+{
+ src.neg = !src.neg;
+
+ return src;
+}
+
+static inline struct etna_inst_src
+absolute(struct etna_inst_src src)
+{
+ src.abs = 1;
+
+ return src;
+}
+
+static inline struct etna_inst_src
+swizzle(struct etna_inst_src src, unsigned swizzle)
+{
+ src.swiz = inst_swiz_compose(src.swiz, swizzle);
+
+ return src;
+}
+
+/* Emit instruction and append it to program */
+static void
+emit_inst(struct etna_compile *c, struct etna_inst *inst)
+{
+ assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
+
+ /* Check for uniform conflicts (each instruction can only access one
+ * uniform),
+ * if detected, use an intermediate temporary */
+ unsigned uni_rgroup = -1;
+ unsigned uni_reg = -1;
+
+ for (int src = 0; src < ETNA_NUM_SRC; ++src) {
+ if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
+ if (uni_reg == -1) { /* first unique uniform used */
+ uni_rgroup = inst->src[src].rgroup;
+ uni_reg = inst->src[src].reg;
+ } else { /* second or later; check that it is a re-use */
+ if (uni_rgroup != inst->src[src].rgroup ||
+ uni_reg != inst->src[src].reg) {
+ DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
+ "accesses different uniforms, "
+ "need to generate extra MOV");
+ struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
+
+ /* Generate move instruction to temporary */
+ etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
+ .opcode = INST_OPCODE_MOV,
+ .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z | INST_COMPS_W),
+ .src[2] = inst->src[src]
+ });
+
+ c->inst_ptr++;
+
+ /* Modify instruction to use temp register instead of uniform */
+ inst->src[src].use = 1;
+ inst->src[src].rgroup = INST_RGROUP_TEMP;
+ inst->src[src].reg = inner_temp.id;
+ inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
+ inst->src[src].neg = 0; /* negation happens on MOV */
+ inst->src[src].abs = 0; /* abs happens on MOV */
+ inst->src[src].amode = 0; /* amode effects happen on MOV */
+ }
+ }
+ }
+ }
+
+ /* Finally assemble the actual instruction */
+ etna_assemble(&c->code[c->inst_ptr * 4], inst);
+ c->inst_ptr++;
+}
+
+static unsigned int
+etna_amode(struct tgsi_ind_register indirect)
+{
+ assert(indirect.File == TGSI_FILE_ADDRESS);
+ assert(indirect.Index == 0);
+
+ switch (indirect.Swizzle) {
+ case TGSI_SWIZZLE_X:
+ return INST_AMODE_ADD_A_X;
+ case TGSI_SWIZZLE_Y:
+ return INST_AMODE_ADD_A_Y;
+ case TGSI_SWIZZLE_Z:
+ return INST_AMODE_ADD_A_Z;
+ case TGSI_SWIZZLE_W:
+ return INST_AMODE_ADD_A_W;
+ default:
+ assert(!"Invalid swizzle");
+ }
+
+ unreachable("bad swizzle");
+}
+
+/* convert destination operand */
+static struct etna_inst_dst
+convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
+{
+ struct etna_inst_dst rv = {
+ /// XXX .amode
+ .write_mask = in->Register.WriteMask,
+ };
+
+ if (in->Register.File == TGSI_FILE_ADDRESS) {
+ assert(in->Register.Index == 0);
+ rv.reg = in->Register.Index;
+ rv.use = 0;
+ } else {
+ rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
+ in->Register.WriteMask);
+ }
+
+ if (in->Register.Indirect)
+ rv.amode = etna_amode(in->Indirect);
+
+ return rv;
+}
+
+/* convert texture operand */
+static struct etna_inst_tex
+convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
+ const struct tgsi_instruction_texture *tex)
+{
+ struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
+ struct etna_inst_tex rv = {
+ // XXX .amode (to allow for an array of samplers?)
+ .swiz = INST_SWIZ_IDENTITY
+ };
+
+ assert(native_reg.is_tex && native_reg.valid);
+ rv.id = native_reg.id;
+
+ return rv;
+}
+
+/* convert source operand */
+static struct etna_inst_src
+etna_create_src(const struct tgsi_full_src_register *tgsi,
+ const struct etna_native_reg *native)
+{
+ const struct tgsi_src_register *reg = &tgsi->Register;
+ struct etna_inst_src rv = {
+ .use = 1,
+ .swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
+ .neg = reg->Negate,
+ .abs = reg->Absolute,
+ .rgroup = native->rgroup,
+ .reg = native->id,
+ .amode = INST_AMODE_DIRECT,
+ };
+
+ assert(native->valid && !native->is_tex);
+
+ if (reg->Indirect)
+ rv.amode = etna_amode(tgsi->Indirect);
+
+ return rv;
+}
+
+static struct etna_inst_src
+etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
+ struct etna_native_reg temp)
+{
+ struct etna_inst mov = { };
+
+ mov.opcode = INST_OPCODE_MOV;
+ mov.sat = 0;
+ mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z | INST_COMPS_W);
+ mov.src[2] = src;
+ emit_inst(c, &mov);
+
+ src.swiz = INST_SWIZ_IDENTITY;
+ src.neg = src.abs = 0;
+ src.rgroup = temp.rgroup;
+ src.reg = temp.id;
+
+ return src;
+}
+
+static struct etna_inst_src
+etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
+{
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+
+ return etna_mov_src_to_temp(c, src, temp);
+}
+
+static bool
+etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
+{
+ return etna_rgroup_is_uniform(a.rgroup) &&
+ etna_rgroup_is_uniform(b.rgroup) &&
+ (a.rgroup != b.rgroup || a.reg != b.reg);
+}
+
+/* create a new label */
+static unsigned int
+alloc_new_label(struct etna_compile *c)
+{
+ struct etna_compile_label label = {
+ .inst_idx = -1, /* start by point to no specific instruction */
+ };
+
+ array_insert(c->labels, label);
+
+ return c->labels_count - 1;
+}
+
+/* place label at current instruction pointer */
+static void
+label_place(struct etna_compile *c, struct etna_compile_label *label)
+{
+ label->inst_idx = c->inst_ptr;
+}
+
+/* mark label use at current instruction.
+ * target of the label will be filled in in the marked instruction's src2.imm
+ * slot as soon
+ * as the value becomes known.
+ */
+static void
+label_mark_use(struct etna_compile *c, int lbl_idx)
+{
+ assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
+ c->lbl_usage[c->inst_ptr] = lbl_idx;
+}
+
+/* walk the frame stack and return first frame with matching type */
+static struct etna_compile_frame *
+find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
+{
+ for (int sp = c->frame_sp; sp >= 0; sp--)
+ if (c->frame_stack[sp].type == type)
+ return &c->frame_stack[sp];
+
+ assert(0);
+ return NULL;
+}
+
+struct instr_translater {
+ void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst,
+ struct etna_inst_src *src);
+ unsigned tgsi_opc;
+ uint8_t opc;
+
+ /* tgsi src -> etna src swizzle */
+ int src[3];
+
+ unsigned cond;
+};
+
+static void
+trans_instr(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
+ struct etna_inst instr = { };
+
+ instr.opcode = t->opc;
+ instr.cond = t->cond;
+ instr.sat = inst->Instruction.Saturate;
+
+ assert(info->num_dst <= 1);
+ if (info->num_dst)
+ instr.dst = convert_dst(c, &inst->Dst[0]);
+
+ assert(info->num_src <= ETNA_NUM_SRC);
+
+ for (unsigned i = 0; i < info->num_src; i++) {
+ int swizzle = t->src[i];
+
+ assert(swizzle != -1);
+ instr.src[swizzle] = src[i];
+ }
+
+ emit_inst(c, &instr);
+}
+
+static void
+trans_min_max(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst,
+ struct etna_inst_src *src)
+{
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_SELECT,
+ .cond = t->cond,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[0] = src[0],
+ .src[1] = src[1],
+ .src[2] = src[0],
+ });
+}
+
+static void
+trans_if(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
+ struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
+
+ /* push IF to stack */
+ f->type = ETNA_COMPILE_FRAME_IF;
+ /* create "else" label */
+ f->lbl_else_idx = alloc_new_label(c);
+ f->lbl_endif_idx = -1;
+
+ /* We need to avoid the emit_inst() below becoming two instructions */
+ if (etna_src_uniforms_conflict(src[0], imm_0))
+ src[0] = etna_mov_src(c, src[0]);
+
+ /* mark position in instruction stream of label reference so that it can be
+ * filled in in next pass */
+ label_mark_use(c, f->lbl_else_idx);
+
+ /* create conditional branch to label if src0 EQ 0 */
+ emit_inst(c, &(struct etna_inst){
+ .opcode = INST_OPCODE_BRANCH,
+ .cond = INST_CONDITION_EQ,
+ .src[0] = src[0],
+ .src[1] = imm_0,
+ /* imm is filled in later */
+ });
+}
+
+static void
+trans_else(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ assert(c->frame_sp > 0);
+ struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
+ assert(f->type == ETNA_COMPILE_FRAME_IF);
+
+ /* create "endif" label, and branch to endif label */
+ f->lbl_endif_idx = alloc_new_label(c);
+ label_mark_use(c, f->lbl_endif_idx);
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_BRANCH,
+ .cond = INST_CONDITION_TRUE,
+ /* imm is filled in later */
+ });
+
+ /* mark "else" label at this position in instruction stream */
+ label_place(c, &c->labels[f->lbl_else_idx]);
+}
+
+static void
+trans_endif(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ assert(c->frame_sp > 0);
+ struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
+ assert(f->type == ETNA_COMPILE_FRAME_IF);
+
+ /* assign "endif" or "else" (if no ELSE) label to current position in
+ * instruction stream, pop IF */
+ if (f->lbl_endif_idx != -1)
+ label_place(c, &c->labels[f->lbl_endif_idx]);
+ else
+ label_place(c, &c->labels[f->lbl_else_idx]);
+}
+
+static void
+trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst,
+ struct etna_inst_src *src)
+{
+ struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
+
+ /* push LOOP to stack */
+ f->type = ETNA_COMPILE_FRAME_LOOP;
+ f->lbl_loop_bgn_idx = alloc_new_label(c);
+ f->lbl_loop_end_idx = alloc_new_label(c);
+
+ label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
+
+ c->num_loops++;
+}
+
+static void
+trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst,
+ struct etna_inst_src *src)
+{
+ assert(c->frame_sp > 0);
+ struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
+ assert(f->type == ETNA_COMPILE_FRAME_LOOP);
+
+ /* mark position in instruction stream of label reference so that it can be
+ * filled in in next pass */
+ label_mark_use(c, f->lbl_loop_bgn_idx);
+
+ /* create branch to loop_bgn label */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_BRANCH,
+ .cond = INST_CONDITION_TRUE,
+ .src[0] = src[0],
+ /* imm is filled in later */
+ });
+
+ label_place(c, &c->labels[f->lbl_loop_end_idx]);
+}
+
+static void
+trans_brk(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ assert(c->frame_sp > 0);
+ struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
+
+ /* mark position in instruction stream of label reference so that it can be
+ * filled in in next pass */
+ label_mark_use(c, f->lbl_loop_end_idx);
+
+ /* create branch to loop_end label */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_BRANCH,
+ .cond = INST_CONDITION_TRUE,
+ .src[0] = src[0],
+ /* imm is filled in later */
+ });
+}
+
+static void
+trans_cont(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ assert(c->frame_sp > 0);
+ struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
+
+ /* mark position in instruction stream of label reference so that it can be
+ * filled in in next pass */
+ label_mark_use(c, f->lbl_loop_bgn_idx);
+
+ /* create branch to loop_end label */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_BRANCH,
+ .cond = INST_CONDITION_TRUE,
+ .src[0] = src[0],
+ /* imm is filled in later */
+ });
+}
+
+static void
+trans_deriv(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = t->opc,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[0] = src[0],
+ .src[2] = src[0],
+ });
+}
+
+static void
+trans_arl(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+ struct etna_inst arl = { };
+ struct etna_inst_dst dst;
+
+ dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
+ INST_COMPS_W);
+
+ if (c->specs->has_sign_floor_ceil) {
+ struct etna_inst floor = { };
+
+ floor.opcode = INST_OPCODE_FLOOR;
+ floor.src[2] = src[0];
+ floor.dst = dst;
+
+ emit_inst(c, &floor);
+ } else {
+ struct etna_inst floor[2] = { };
+
+ floor[0].opcode = INST_OPCODE_FRC;
+ floor[0].sat = inst->Instruction.Saturate;
+ floor[0].dst = dst;
+ floor[0].src[2] = src[0];
+
+ floor[1].opcode = INST_OPCODE_ADD;
+ floor[1].sat = inst->Instruction.Saturate;
+ floor[1].dst = dst;
+ floor[1].src[0] = src[0];
+ floor[1].src[2].use = 1;
+ floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
+ floor[1].src[2].neg = 1;
+ floor[1].src[2].rgroup = temp.rgroup;
+ floor[1].src[2].reg = temp.id;
+
+ emit_inst(c, &floor[0]);
+ emit_inst(c, &floor[1]);
+ }
+
+ arl.opcode = INST_OPCODE_MOVAR;
+ arl.sat = inst->Instruction.Saturate;
+ arl.dst = convert_dst(c, &inst->Dst[0]);
+ arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
+
+ emit_inst(c, &arl);
+}
+
+static void
+trans_lrp(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ /* dst = src0 * src1 + (1 - src0) * src2
+ * => src0 * src1 - (src0 - 1) * src2
+ * => src0 * src1 - (src0 * src2 - src2)
+ * MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
+ * MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
+ */
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+ if (etna_src_uniforms_conflict(src[0], src[1]) ||
+ etna_src_uniforms_conflict(src[0], src[2])) {
+ src[0] = etna_mov_src(c, src[0]);
+ }
+
+ struct etna_inst mad[2] = { };
+ mad[0].opcode = INST_OPCODE_MAD;
+ mad[0].sat = 0;
+ mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z | INST_COMPS_W);
+ mad[0].src[0] = src[0];
+ mad[0].src[1] = src[2];
+ mad[0].src[2] = negate(src[2]);
+ mad[1].opcode = INST_OPCODE_MAD;
+ mad[1].sat = inst->Instruction.Saturate;
+ mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
+ mad[1].src[1] = src[1];
+ mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
+
+ emit_inst(c, &mad[0]);
+ emit_inst(c, &mad[1]);
+}
+
+static void
+trans_lit(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ /* SELECT.LT tmp._y__, 0, src.yyyy, 0
+ * - can be eliminated if src.y is a uniform and >= 0
+ * SELECT.GT tmp.___w, 128, src.wwww, 128
+ * SELECT.LT tmp.___w, -128, tmp.wwww, -128
+ * - can be eliminated if src.w is a uniform and fits clamp
+ * LOG tmp.x, void, void, tmp.yyyy
+ * MUL tmp.x, tmp.xxxx, tmp.wwww, void
+ * LITP dst, undef, src.xxxx, tmp.xxxx
+ */
+ struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
+ struct etna_inst_src src_y = { };
+
+ if (!etna_rgroup_is_uniform(src[0].rgroup)) {
+ src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
+
+ struct etna_inst ins = { };
+ ins.opcode = INST_OPCODE_SELECT;
+ ins.cond = INST_CONDITION_LT;
+ ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
+ ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
+ ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
+ emit_inst(c, &ins);
+ } else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
+ src_y = alloc_imm_f32(c, 0.0);
+ else
+ src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
+
+ struct etna_inst_src src_w = { };
+
+ if (!etna_rgroup_is_uniform(src[0].rgroup)) {
+ src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
+
+ struct etna_inst ins = { };
+ ins.opcode = INST_OPCODE_SELECT;
+ ins.cond = INST_CONDITION_GT;
+ ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
+ ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
+ ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
+ emit_inst(c, &ins);
+ ins.cond = INST_CONDITION_LT;
+ ins.src[0].neg = !ins.src[0].neg;
+ ins.src[2].neg = !ins.src[2].neg;
+ ins.src[1] = src_w;
+ emit_inst(c, &ins);
+ } else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
+ src_w = alloc_imm_f32(c, -128.);
+ else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
+ src_w = alloc_imm_f32(c, 128.);
+ else
+ src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
+
+ if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_LOG,
+ .dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
+ .src[2] = src_y,
+ .tex = { .amode=1 }, /* Unknown bit needs to be set */
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
+ .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
+ .src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
+ });
+ } else {
+ struct etna_inst ins[3] = { };
+ ins[0].opcode = INST_OPCODE_LOG;
+ ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
+ ins[0].src[2] = src_y;
+
+ emit_inst(c, &ins[0]);
+ }
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = 0,
+ .dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
+ .src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
+ .src[1] = src_w,
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_LITP,
+ .sat = 0,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
+ .src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
+ .src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
+ });
+}
+
+static void
+trans_ssg(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ if (c->specs->has_sign_floor_ceil) {
+ emit_inst(c, &(struct etna_inst){
+ .opcode = INST_OPCODE_SIGN,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[2] = src[0],
+ });
+ } else {
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+ struct etna_inst ins[2] = { };
+
+ ins[0].opcode = INST_OPCODE_SET;
+ ins[0].cond = INST_CONDITION_NZ;
+ ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z | INST_COMPS_W);
+ ins[0].src[0] = src[0];
+
+ ins[1].opcode = INST_OPCODE_SELECT;
+ ins[1].cond = INST_CONDITION_LZ;
+ ins[1].sat = inst->Instruction.Saturate;
+ ins[1].dst = convert_dst(c, &inst->Dst[0]);
+ ins[1].src[0] = src[0];
+ ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
+ ins[1].src[1] = negate(ins[1].src[2]);
+
+ emit_inst(c, &ins[0]);
+ emit_inst(c, &ins[1]);
+ }
+}
+
+static void
+trans_trig(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
+ /* On newer chips alternative SIN/COS instructions are implemented,
+ * which:
+ * - Need their input scaled by 1/pi instead of 2/pi
+ * - Output an x and y component, which need to be multiplied to
+ * get the result
+ */
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_Z),
+ .src[0] = src[0], /* any swizzling happens here */
+ .src[1] = alloc_imm_f32(c, 1.0f / M_PI),
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
+ ? INST_OPCODE_COS
+ : INST_OPCODE_SIN,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
+ .src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
+ .tex = { .amode=1 }, /* Unknown bit needs to be set */
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
+ .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
+ });
+
+ } else if (c->specs->has_sin_cos_sqrt) {
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+ /* add divide by PI/2, using a temp register. GC2000
+ * fails with src==dst for the trig instruction. */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z | INST_COMPS_W),
+ .src[0] = src[0], /* any swizzling happens here */
+ .src[1] = alloc_imm_f32(c, 2.0f / M_PI),
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
+ ? INST_OPCODE_COS
+ : INST_OPCODE_SIN,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
+ });
+ } else {
+ /* Implement Nick's fast sine/cosine. Taken from:
+ * http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
+ * A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
+ * MAD t.x_zw, src.xxxx, A, B
+ * FRC t.x_z_, void, void, t.xwzw
+ * MAD t.x_z_, t.xwzw, 2, -1
+ * MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
+ * DP3 t.x_z_, t.zyww, C, void (for sin)
+ * DP3 t.__z_, t.zyww, C, void (for scs)
+ * MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
+ * DP3 t.x_z_, t.xyww, C, void (for cos)
+ * DP3 t.x___, t.xyww, C, void (for scs)
+ * MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
+ * MAD dst, t.ywyw, .2225, t.xzxz
+ */
+ struct etna_inst *p, ins[9] = { };
+ struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
+ struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
+ struct etna_inst_src sincos[3], in = src[0];
+ sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
+ sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
+
+ /* A uniform source will cause the inner temp limit to
+ * be exceeded. Explicitly deal with that scenario.
+ */
+ if (etna_rgroup_is_uniform(src[0].rgroup)) {
+ struct etna_inst ins = { };
+ ins.opcode = INST_OPCODE_MOV;
+ ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
+ ins.src[2] = in;
+ emit_inst(c, &ins);
+ in = t0s;
+ }
+
+ ins[0].opcode = INST_OPCODE_MAD;
+ ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
+ ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
+ ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
+ ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
+
+ ins[1].opcode = INST_OPCODE_FRC;
+ ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
+ ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
+
+ ins[2].opcode = INST_OPCODE_MAD;
+ ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
+ ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
+ ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
+ ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
+
+ unsigned mul_swiz, dp3_swiz;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
+ mul_swiz = SWIZZLE(W, Z, W, W);
+ dp3_swiz = SWIZZLE(Z, Y, W, W);
+ } else {
+ mul_swiz = SWIZZLE(W, X, W, W);
+ dp3_swiz = SWIZZLE(X, Y, W, W);
+ }
+
+ ins[3].opcode = INST_OPCODE_MUL;
+ ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
+ ins[3].src[0] = swizzle(t0s, mul_swiz);
+ ins[3].src[1] = absolute(ins[3].src[0]);
+
+ ins[4].opcode = INST_OPCODE_DP3;
+ ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
+ ins[4].src[0] = swizzle(t0s, dp3_swiz);
+ ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
+
+ p = &ins[5];
+ p->opcode = INST_OPCODE_MAD;
+ p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
+ p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
+ p->src[1] = absolute(p->src[0]);
+ p->src[2] = negate(p->src[0]);
+
+ p++;
+ p->opcode = INST_OPCODE_MAD;
+ p->sat = inst->Instruction.Saturate;
+ p->dst = convert_dst(c, &inst->Dst[0]),
+ p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
+ p->src[1] = alloc_imm_f32(c, 0.2225);
+ p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
+
+ for (int i = 0; &ins[i] <= p; i++)
+ emit_inst(c, &ins[i]);
+ }
+}
+
+static void
+trans_lg2(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ if (c->specs->has_new_transcendentals) {
+ /* On newer chips alternative LOG instruction is implemented,
+ * which outputs an x and y component, which need to be multiplied to
+ * get the result.
+ */
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_LOG,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
+ .src[2] = src[0],
+ .tex = { .amode=1 }, /* Unknown bit needs to be set */
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
+ .src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
+ });
+ } else {
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_LOG,
+ .sat = inst->Instruction.Saturate,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .src[2] = src[0],
+ });
+ }
+}
+
+static void
+trans_sampler(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst,
+ struct etna_inst_src *src)
+{
+ /* There is no native support for GL texture rectangle coordinates, so
+ * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
+ if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
+ uint32_t unit = inst->Src[1].Register.Index;
+ struct etna_inst ins[2] = { };
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+
+ ins[0].opcode = INST_OPCODE_MUL;
+ ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
+ ins[0].src[0] = src[0];
+ ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit);
+
+ ins[1].opcode = INST_OPCODE_MUL;
+ ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
+ ins[1].src[0] = src[0];
+ ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit);
+
+ emit_inst(c, &ins[0]);
+ emit_inst(c, &ins[1]);
+
+ src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_TEXLD,
+ .sat = 0,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
+ .src[0] = src[0],
+ });
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_TEXLDB,
+ .sat = 0,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
+ .src[0] = src[0],
+ });
+ break;
+
+ case TGSI_OPCODE_TXL:
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_TEXLDL,
+ .sat = 0,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
+ .src[0] = src[0],
+ });
+ break;
+
+ case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
+ struct etna_native_reg temp = etna_compile_get_inner_temp(c);
+
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_RCP,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
+ .src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .sat = 0,
+ .dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
+ INST_COMPS_Z), /* tmp.xyz */
+ .src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
+ .src[1] = src[0], /* src.xyzw */
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_TEXLD,
+ .sat = 0,
+ .dst = convert_dst(c, &inst->Dst[0]),
+ .tex = convert_tex(c, &inst->Src[1], &inst->Texture),
+ .src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
+ });
+ } break;
+
+ default:
+ BUG("Unhandled instruction %s",
+ tgsi_get_opcode_name(inst->Instruction.Opcode));
+ assert(0);
+ break;
+ }
+}
+
+static void
+trans_dummy(const struct instr_translater *t, struct etna_compile *c,
+ const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
+{
+ /* nothing to do */
+}
+
+static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+#define INSTR(n, f, ...) \
+ [TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
+
+ INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
+ INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
+ INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
+ INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
+ INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
+ INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
+ INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
+ INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
+ INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
+ INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
+ INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
+ INSTR(LG2, trans_lg2),
+ INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
+ INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
+ INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
+ INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
+ INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
+
+ INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
+ INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
+
+ INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
+ INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
+
+ INSTR(IF, trans_if),
+ INSTR(ELSE, trans_else),
+ INSTR(ENDIF, trans_endif),
+
+ INSTR(BGNLOOP, trans_loop_bgn),
+ INSTR(ENDLOOP, trans_loop_end),
+ INSTR(BRK, trans_brk),
+ INSTR(CONT, trans_cont),
+
+ INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
+ INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
+
+ INSTR(ARL, trans_arl),
+ INSTR(LRP, trans_lrp),
+ INSTR(LIT, trans_lit),
+ INSTR(SSG, trans_ssg),
+
+ INSTR(SIN, trans_trig),
+ INSTR(COS, trans_trig),
+
+ INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
+ INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
+ INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
+ INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
+ INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
+ INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
+
+ INSTR(TEX, trans_sampler),
+ INSTR(TXB, trans_sampler),
+ INSTR(TXL, trans_sampler),
+ INSTR(TXP, trans_sampler),
+
+ INSTR(NOP, trans_dummy),
+ INSTR(END, trans_dummy),
+};
+
+/* Pass -- compile instructions */
+static void
+etna_compile_pass_generate_code(struct etna_compile *c)
+{
+ struct tgsi_parse_context ctx = { };
+ ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
+ assert(status == TGSI_PARSE_OK);
+
+ int inst_idx = 0;
+ while (!tgsi_parse_end_of_tokens(&ctx)) {
+ const struct tgsi_full_instruction *inst = 0;
+
+ /* No inner temps used yet for this instruction, clear counter */
+ c->inner_temps = 0;
+
+ tgsi_parse_token(&ctx);
+
+ switch (ctx.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ /* iterate over operands */
+ inst = &ctx.FullToken.FullInstruction;
+ if (c->dead_inst[inst_idx]) { /* skip dead instructions */
+ inst_idx++;
+ continue;
+ }
+
+ /* Lookup the TGSI information and generate the source arguments */
+ struct etna_inst_src src[ETNA_NUM_SRC];
+ memset(src, 0, sizeof(src));
+
+ const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
+
+ for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
+ const struct tgsi_full_src_register *reg = &inst->Src[i];
+ const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native;
+
+ if (!n->valid || n->is_tex)
+ continue;
+
+ src[i] = etna_create_src(reg, n);
+ }
+
+ const unsigned opc = inst->Instruction.Opcode;
+ const struct instr_translater *t = &translaters[opc];
+
+ if (t->fxn) {
+ t->fxn(t, c, inst, src);
+
+ inst_idx += 1;
+ } else {
+ BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
+ assert(0);
+ }
+ break;
+ }
+ }
+ tgsi_parse_free(&ctx);
+}
+
+/* Look up register by semantic */
+static struct etna_reg_desc *
+find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
+{
+ for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[file].reg[idx];
+
+ if (reg->semantic.Name == name && reg->semantic.Index == index)
+ return reg;
+ }
+
+ return NULL; /* not found */
+}
+
+/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
+ * - this is a vertex shader
+ * - and this is an older GPU
+ */
+static void
+etna_compile_add_z_div_if_needed(struct etna_compile *c)
+{
+ if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
+ /* find position out */
+ struct etna_reg_desc *pos_reg =
+ find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
+
+ if (pos_reg != NULL) {
+ /*
+ * ADD tX.__z_, tX.zzzz, void, tX.wwww
+ * MUL tX.__z_, tX.zzzz, 0.5, void
+ */
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_ADD,
+ .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
+ .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
+ .src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
+ });
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MUL,
+ .dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
+ .src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
+ .src[1] = alloc_imm_f32(c, 0.5f),
+ });
+ }
+ }
+}
+
+static void
+etna_compile_frag_rb_swap(struct etna_compile *c)
+{
+ if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
+ /* find color out */
+ struct etna_reg_desc *color_reg =
+ find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
+
+ emit_inst(c, &(struct etna_inst) {
+ .opcode = INST_OPCODE_MOV,
+ .dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
+ .src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
+ });
+ }
+}
+
+/** add a NOP to the shader if
+ * a) the shader is empty
+ * or
+ * b) there is a label at the end of the shader
+ */
+static void
+etna_compile_add_nop_if_needed(struct etna_compile *c)
+{
+ bool label_at_last_inst = false;
+
+ for (int idx = 0; idx < c->labels_count; ++idx) {
+ if (c->labels[idx].inst_idx == c->inst_ptr)
+ label_at_last_inst = true;
+
+ }
+
+ if (c->inst_ptr == 0 || label_at_last_inst)
+ emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
+}
+
+static void
+assign_uniforms(struct etna_compile_file *file, unsigned base)
+{
+ for (int idx = 0; idx < file->reg_size; ++idx) {
+ file->reg[idx].native.valid = 1;
+ file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
+ file->reg[idx].native.id = base + idx;
+ }
+}
+
+/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
+ * CONST must be consecutive as const buffers are supposed to be consecutive,
+ * and before IMM, as this is
+ * more convenient because is possible for the compilation process itself to
+ * generate extra
+ * immediates for constants such as pi, one, zero.
+ */
+static void
+assign_constants_and_immediates(struct etna_compile *c)
+{
+ assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
+ /* immediates start after the constants */
+ c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
+ assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
+ DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
+ c->imm_size);
+}
+
+/* Assign declared samplers to native texture units */
+static void
+assign_texture_units(struct etna_compile *c)
+{
+ uint tex_base = 0;
+
+ if (c->info.processor == PIPE_SHADER_VERTEX)
+ tex_base = c->specs->vertex_sampler_offset;
+
+ for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
+ c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
+ c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
+ c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
+ }
+}
+
+/* Additional pass to fill in branch targets. This pass should be last
+ * as no instruction reordering or removing/addition can be done anymore
+ * once the branch targets are computed.
+ */
+static void
+etna_compile_fill_in_labels(struct etna_compile *c)
+{
+ for (int idx = 0; idx < c->inst_ptr; ++idx) {
+ if (c->lbl_usage[idx] != -1)
+ etna_assemble_set_imm(&c->code[idx * 4],
+ c->labels[c->lbl_usage[idx]].inst_idx);
+ }
+}
+
+/* compare two etna_native_reg structures, return true if equal */
+static bool
+cmp_etna_native_reg(const struct etna_native_reg to,
+ const struct etna_native_reg from)
+{
+ return to.valid == from.valid && to.is_tex == from.is_tex &&
+ to.rgroup == from.rgroup && to.id == from.id;
+}
+
+/* go through all declarations and swap native registers *to* and *from* */
+static void
+swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
+ const struct etna_native_reg from)
+{
+ if (cmp_etna_native_reg(from, to))
+ return; /* Nothing to do */
+
+ for (int idx = 0; idx < c->total_decls; ++idx) {
+ if (cmp_etna_native_reg(c->decl[idx].native, from)) {
+ c->decl[idx].native = to;
+ } else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
+ c->decl[idx].native = from;
+ }
+ }
+}
+
+/* For PS we need to permute so that inputs are always in temporary 0..N-1.
+ * Semantic POS is always t0. If that semantic is not used, avoid t0.
+ */
+static void
+permute_ps_inputs(struct etna_compile *c)
+{
+ /* Special inputs:
+ * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
+ * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
+ */
+ uint native_idx = 1;
+
+ for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
+ uint input_id;
+ assert(reg->has_semantic);
+
+ if (!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION)
+ continue;
+
+ input_id = native_idx++;
+ swap_native_registers(c, etna_native_temp(input_id),
+ c->file[TGSI_FILE_INPUT].reg[idx].native);
+ }
+
+ c->num_varyings = native_idx - 1;
+
+ if (native_idx > c->next_free_native)
+ c->next_free_native = native_idx;
+}
+
+/* fill in ps inputs into shader object */
+static void
+fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
+{
+ struct etna_shader_io_file *sf = &sobj->infile;
+
+ sf->num_reg = 0;
+
+ for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
+
+ if (reg->native.id > 0) {
+ assert(sf->num_reg < ETNA_NUM_INPUTS);
+ sf->reg[sf->num_reg].reg = reg->native.id;
+ sf->reg[sf->num_reg].semantic = reg->semantic;
+ /* convert usage mask to number of components (*=wildcard)
+ * .r (0..1) -> 1 component
+ * .*g (2..3) -> 2 component
+ * .**b (4..7) -> 3 components
+ * .***a (8..15) -> 4 components
+ */
+ sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
+ sf->num_reg++;
+ }
+ }
+
+ assert(sf->num_reg == c->num_varyings);
+ sobj->input_count_unk8 = 31; /* XXX what is this */
+}
+
+/* fill in output mapping for ps into shader object */
+static void
+fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
+{
+ sobj->outfile.num_reg = 0;
+
+ for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
+
+ switch (reg->semantic.Name) {
+ case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
+ sobj->ps_color_out_reg = reg->native.id;
+ break;
+ case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
+ sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
+ break;
+ default:
+ assert(0); /* only outputs supported are COLOR and POSITION at the moment */
+ }
+ }
+}
+
+/* fill in inputs for vs into shader object */
+static void
+fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
+{
+ struct etna_shader_io_file *sf = &sobj->infile;
+
+ sf->num_reg = 0;
+ for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
+ assert(sf->num_reg < ETNA_NUM_INPUTS);
+
+ if (!reg->native.valid)
+ continue;
+
+ /* XXX exclude inputs with special semantics such as gl_frontFacing */
+ sf->reg[sf->num_reg].reg = reg->native.id;
+ sf->reg[sf->num_reg].semantic = reg->semantic;
+ sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
+ sf->num_reg++;
+ }
+
+ sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
+}
+
+/* build two-level output index [Semantic][Index] for fast linking */
+static void
+build_output_index(struct etna_shader_variant *sobj)
+{
+ int total = 0;
+ int offset = 0;
+
+ for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name)
+ total += sobj->output_count_per_semantic[name];
+
+ sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *));
+
+ for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) {
+ sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset];
+ offset += sobj->output_count_per_semantic[name];
+ }
+
+ for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) {
+ sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name]
+ [sobj->outfile.reg[idx].semantic.Index] =
+ &sobj->outfile.reg[idx];
+ }
+}
+
+/* fill in outputs for vs into shader object */
+static void
+fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
+{
+ struct etna_shader_io_file *sf = &sobj->outfile;
+
+ sf->num_reg = 0;
+ for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
+ struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
+ assert(sf->num_reg < ETNA_NUM_INPUTS);
+
+ switch (reg->semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ sobj->vs_pos_out_reg = reg->native.id;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ sobj->vs_pointsize_out_reg = reg->native.id;
+ break;
+ default:
+ sf->reg[sf->num_reg].reg = reg->native.id;
+ sf->reg[sf->num_reg].semantic = reg->semantic;
+ sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
+ sf->num_reg++;
+ sobj->output_count_per_semantic[reg->semantic.Name] =
+ MAX2(reg->semantic.Index + 1,
+ sobj->output_count_per_semantic[reg->semantic.Name]);
+ }
+ }
+
+ /* build two-level index for linking */
+ build_output_index(sobj);
+
+ /* fill in "mystery meat" load balancing value. This value determines how
+ * work is scheduled between VS and PS
+ * in the unified shader architecture. More precisely, it is determined from
+ * the number of VS outputs, as well as chip-specific
+ * vertex output buffer size, vertex cache size, and the number of shader
+ * cores.
+ *
+ * XXX this is a conservative estimate, the "optimal" value is only known for
+ * sure at link time because some
+ * outputs may be unused and thus unmapped. Then again, in the general use
+ * case with GLSL the vertex and fragment
+ * shaders are linked already before submitting to Gallium, thus all outputs
+ * are used.
+ */
+ int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
+ assert(half_out);
+
+ uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
+ 2 * half_out * c->specs->vertex_cache_size)) +
+ 9) /
+ 10;
+ uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
+ sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
+ VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
+ VIVS_VS_LOAD_BALANCING_C(0x3f) |
+ VIVS_VS_LOAD_BALANCING_D(0x0f);
+}
+
+static bool
+etna_compile_check_limits(struct etna_compile *c)
+{
+ int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
+ ? c->specs->max_vs_uniforms
+ : c->specs->max_ps_uniforms;
+ /* round up number of uniforms, including immediates, in units of four */
+ int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
+
+ if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
+ DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
+ c->specs->max_instructions);
+ return false;
+ }
+
+ if (c->next_free_native > c->specs->max_registers) {
+ DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
+ c->specs->max_registers);
+ return false;
+ }
+
+ if (num_uniforms > max_uniforms) {
+ DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
+ max_uniforms);
+ return false;
+ }
+
+ if (c->num_varyings > c->specs->max_varyings) {
+ DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
+ c->specs->max_varyings);
+ return false;
+ }
+
+ if (c->imm_base > c->specs->num_constants) {
+ DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
+ c->specs->num_constants);
+ }
+
+ return true;
+}
+
+static void
+copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
+{
+ uint32_t count = c->imm_base + c->imm_size;
+ struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
+
+ uinfo->imm_count = count;
+
+ uinfo->imm_data = malloc(count * sizeof(*c->imm_data));
+ for (unsigned i = 0; i < c->imm_base; i++)
+ uinfo->imm_data[i] = i;
+ memcpy(&uinfo->imm_data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));
+
+ uinfo->imm_contents = malloc(count * sizeof(*c->imm_contents));
+ for (unsigned i = 0; i < c->imm_base; i++)
+ uinfo->imm_contents[i] = ETNA_IMMEDIATE_UNIFORM;
+ memcpy(&uinfo->imm_contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));
+
+ etna_set_shader_uniforms_dirty_flags(sobj);
+}
+
+bool
+etna_compile_shader(struct etna_shader_variant *v)
+{
+ if (DBG_ENABLED(ETNA_DBG_NIR))
+ return etna_compile_shader_nir(v);
+
+ /* Create scratch space that may be too large to fit on stack
+ */
+ bool ret;
+ struct etna_compile *c;
+
+ if (unlikely(!v))
+ return false;
+
+ const struct etna_specs *specs = v->shader->specs;
+
+ struct tgsi_lowering_config lconfig = {
+ .lower_FLR = !specs->has_sign_floor_ceil,
+ .lower_CEIL = !specs->has_sign_floor_ceil,
+ .lower_POW = true,
+ .lower_EXP = true,
+ .lower_LOG = true,
+ .lower_DP2 = !specs->has_halti2_instructions,
+ .lower_TRUNC = true,
+ };
+
+ c = CALLOC_STRUCT(etna_compile);
+ if (!c)
+ return false;
+
+ memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
+
+ const struct tgsi_token *tokens = v->shader->tokens;
+
+ c->specs = specs;
+ c->key = &v->key;
+ c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
+ c->free_tokens = !!c->tokens;
+ if (!c->tokens) {
+ /* no lowering */
+ c->tokens = tokens;
+ }
+
+ /* Build a map from gallium register to native registers for files
+ * CONST, SAMP, IMM, OUT, IN, TEMP.
+ * SAMP will map as-is for fragment shaders, there will be a +8 offset for
+ * vertex shaders.
+ */
+ /* Pass one -- check register file declarations and immediates */
+ etna_compile_parse_declarations(c);
+
+ etna_allocate_decls(c);
+
+ /* Pass two -- check usage of temporaries, inputs, outputs */
+ etna_compile_pass_check_usage(c);
+
+ assign_special_inputs(c);
+
+ /* Assign native temp register to TEMPs */
+ assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
+
+ /* optimize outputs */
+ etna_compile_pass_optimize_outputs(c);
+
+ /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE)
+ * this is part of RGROUP_INTERNAL
+ */
+
+ /* assign inputs: last usage of input should be <= first usage of temp */
+ /* potential optimization case:
+ * if single MOV TEMP[y], IN[x] before which temp y is not used, and
+ * after which IN[x]
+ * is not read, temp[y] can be used as input register as-is
+ */
+ /* sort temporaries by first use
+ * sort inputs by last usage
+ * iterate over inputs, temporaries
+ * if last usage of input <= first usage of temp:
+ * assign input to temp
+ * advance input, temporary pointer
+ * else
+ * advance temporary pointer
+ *
+ * potential problem: instruction with multiple inputs of which one is the
+ * temp and the other is the input;
+ * however, as the temp is not used before this, how would this make
+ * sense? uninitialized temporaries have an undefined
+ * value, so this would be ok
+ */
+ assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
+
+ /* assign outputs: first usage of output should be >= last usage of temp */
+ /* potential optimization case:
+ * if single MOV OUT[x], TEMP[y] (with full write mask, or at least
+ * writing all components that are used in
+ * the shader) after which temp y is no longer used temp[y] can be
+ * used as output register as-is
+ *
+ * potential problem: instruction with multiple outputs of which one is the
+ * temp and the other is the output;
+ * however, as the temp is not used after this, how would this make
+ * sense? could just discard the output value
+ */
+ /* sort temporaries by last use
+ * sort outputs by first usage
+ * iterate over outputs, temporaries
+ * if first usage of output >= last usage of temp:
+ * assign output to temp
+ * advance output, temporary pointer
+ * else
+ * advance temporary pointer
+ */
+ assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
+
+ assign_constants_and_immediates(c);
+ assign_texture_units(c);
+
+ /* list declarations */
+ for (int x = 0; x < c->total_decls; ++x) {
+ DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
+ "last_use=%i native=%i usage_mask=%x "
+ "has_semantic=%i",
+ x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
+ c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
+ c->decl[x].native.valid ? c->decl[x].native.id : -1,
+ c->decl[x].usage_mask, c->decl[x].has_semantic);
+ if (c->decl[x].has_semantic)
+ DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
+ tgsi_semantic_names[c->decl[x].semantic.Name],
+ c->decl[x].semantic.Index);
+ }
+ /* XXX for PS we need to permute so that inputs are always in temporary
+ * 0..N-1.
+ * There is no "switchboard" for varyings (AFAIK!). The output color,
+ * however, can be routed
+ * from an arbitrary temporary.
+ */
+ if (c->info.processor == PIPE_SHADER_FRAGMENT)
+ permute_ps_inputs(c);
+
+
+ /* list declarations */
+ for (int x = 0; x < c->total_decls; ++x) {
+ DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
+ "last_use=%i native=%i usage_mask=%x "
+ "has_semantic=%i",
+ x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
+ c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
+ c->decl[x].native.valid ? c->decl[x].native.id : -1,
+ c->decl[x].usage_mask, c->decl[x].has_semantic);
+ if (c->decl[x].has_semantic)
+ DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
+ tgsi_semantic_names[c->decl[x].semantic.Name],
+ c->decl[x].semantic.Index);
+ }
+
+ /* pass 3: generate instructions */
+ etna_compile_pass_generate_code(c);
+ etna_compile_add_z_div_if_needed(c);
+ etna_compile_frag_rb_swap(c);
+ etna_compile_add_nop_if_needed(c);
+
+ ret = etna_compile_check_limits(c);
+ if (!ret)
+ goto out;
+
+ etna_compile_fill_in_labels(c);
+
+ /* fill in output structure */
+ v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
+ v->code_size = c->inst_ptr * 4;
+ v->code = mem_dup(c->code, c->inst_ptr * 16);
+ v->num_loops = c->num_loops;
+ v->num_temps = c->next_free_native;
+ v->vs_id_in_reg = -1;
+ v->vs_pos_out_reg = -1;
+ v->vs_pointsize_out_reg = -1;
+ v->ps_color_out_reg = -1;
+ v->ps_depth_out_reg = -1;
+ v->needs_icache = c->inst_ptr > c->specs->max_instructions;
+ copy_uniform_state_to_shader(c, v);
+
+ if (c->info.processor == PIPE_SHADER_VERTEX) {
+ fill_in_vs_inputs(v, c);
+ fill_in_vs_outputs(v, c);
+ } else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
+ fill_in_ps_inputs(v, c);
+ fill_in_ps_outputs(v, c);
+ }
+
+out:
+ if (c->free_tokens)
+ FREE((void *)c->tokens);
+
+ FREE(c->labels);
+ FREE(c);
+
+ return ret;
+}
+
+extern const char *tgsi_swizzle_names[];
+void
+etna_dump_shader(const struct etna_shader_variant *shader)
+{
+ if (shader->stage == MESA_SHADER_VERTEX)
+ printf("VERT\n");
+ else
+ printf("FRAG\n");
+
+
+ etna_disasm(shader->code, shader->code_size, PRINT_RAW);
+
+ printf("num loops: %i\n", shader->num_loops);
+ printf("num temps: %i\n", shader->num_temps);
+ printf("immediates:\n");
+ for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) {
+ printf(" [%i].%s = %f (0x%08x) (%d)\n",
+ idx / 4,
+ tgsi_swizzle_names[idx % 4],
+ *((float *)&shader->uniforms.imm_data[idx]),
+ shader->uniforms.imm_data[idx],
+ shader->uniforms.imm_contents[idx]);
+ }
+
+ if (DBG_ENABLED(ETNA_DBG_NIR)) {
+ printf("inputs:\n");
+ for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
+ printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg,
+ (shader->stage == MESA_SHADER_VERTEX) ?
+ gl_vert_attrib_name(shader->infile.reg[idx].slot) :
+ gl_varying_slot_name(shader->infile.reg[idx].slot),
+ shader->infile.reg[idx].num_components);
+ }
+ printf("outputs:\n");
+ for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
+ printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg,
+ (shader->stage == MESA_SHADER_VERTEX) ?
+ gl_varying_slot_name(shader->outfile.reg[idx].slot) :
+ gl_frag_result_name(shader->outfile.reg[idx].slot),
+ shader->outfile.reg[idx].num_components);
+ }
+ } else {
+ printf("inputs:\n");
+ for (int idx = 0; idx < shader->infile.num_reg; ++idx) {
+ printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg,
+ tgsi_semantic_names[shader->infile.reg[idx].semantic.Name],
+ shader->infile.reg[idx].semantic.Index,
+ shader->infile.reg[idx].num_components);
+ }
+ printf("outputs:\n");
+ for (int idx = 0; idx < shader->outfile.num_reg; ++idx) {
+ printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg,
+ tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name],
+ shader->outfile.reg[idx].semantic.Index,
+ shader->outfile.reg[idx].num_components);
+ }
+ }
+ printf("special:\n");
+ if (shader->stage == MESA_SHADER_VERTEX) {
+ printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg);
+ printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg);
+ printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing);
+ } else {
+ printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg);
+ printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg);
+ }
+ printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8);
+}
+
+void
+etna_destroy_shader(struct etna_shader_variant *shader)
+{
+ if (DBG_ENABLED(ETNA_DBG_NIR))
+ return etna_destroy_shader_nir(shader);
+
+ assert(shader);
+
+ FREE(shader->code);
+ FREE(shader->uniforms.imm_data);
+ FREE(shader->uniforms.imm_contents);
+ FREE(shader->output_per_semantic_list);
+ FREE(shader);
+}
+
+static const struct etna_shader_inout *
+etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
+ const struct etna_shader_inout *in)
+{
+ if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name])
+ return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index];
+
+ return NULL;
+}
+
+bool
+etna_link_shader(struct etna_shader_link_info *info,
+ const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
+{
+ if (DBG_ENABLED(ETNA_DBG_NIR))
+ return etna_link_shader_nir(info, vs, fs);
+
+ int comp_ofs = 0;
+ /* For each fragment input we need to find the associated vertex shader
+ * output, which can be found by matching on semantic name and index. A
+ * binary search could be used because the vs outputs are sorted by their
+ * semantic index and grouped by semantic type by fill_in_vs_outputs.
+ */
+ assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
+ info->pcoord_varying_comp_ofs = -1;
+
+ for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
+ const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
+ const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
+ struct etna_varying *varying;
+ bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
+
+ assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
+
+ if (fsio->reg > info->num_varyings)
+ info->num_varyings = fsio->reg;
+
+ varying = &info->varyings[fsio->reg - 1];
+ varying->num_components = fsio->num_components;
+
+ if (!interpolate_always) /* colors affected by flat shading */
+ varying->pa_attributes = 0x200;
+ else /* texture coord or other bypasses flat shading */
+ varying->pa_attributes = 0x2f1;
+
+ varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
+ varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
+ varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
+ varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
+
+ /* point coord is an input to the PS without matching VS output,
+ * so it gets a varying slot without being assigned a VS register.
+ */
+ if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
+ varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
+ varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
+
+ info->pcoord_varying_comp_ofs = comp_ofs;
+ } else {
+ if (vsio == NULL) { /* not found -- link error */
+ BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
+ return true;
+ }
+
+ varying->reg = vsio->reg;
+ }
+
+ comp_ofs += varying->num_components;
+ }
+
+ assert(info->num_varyings == fs->infile.num_reg);
+
+ return false;
+}
'etnaviv_blt.h',
'etnaviv_clear_blit.c',
'etnaviv_clear_blit.h',
- 'etnaviv_compiler.c',
'etnaviv_compiler.h',
'etnaviv_compiler_nir.c',
'etnaviv_compiler_nir_emit.h',
+ 'etnaviv_compiler_tgsi.c',
'etnaviv_context.c',
'etnaviv_context.h',
'etnaviv_debug.h',