From 3af1f3b9220733f5e3a76fe38fbc397974678234 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 21 Aug 2007 16:24:38 -0600 Subject: [PATCH] Initial check-in of i915 fragment program translation (from tgsi). --- src/mesa/pipe/i915simple/i915_fpc.c | 183 ++++ src/mesa/pipe/i915simple/i915_fpc.h | 339 +++++++ src/mesa/pipe/i915simple/i915_fpc_debug.c | 346 ++++++++ src/mesa/pipe/i915simple/i915_fpc_emit.c | 430 +++++++++ src/mesa/pipe/i915simple/i915_fpc_translate.c | 838 ++++++++++++++++++ 5 files changed, 2136 insertions(+) create mode 100644 src/mesa/pipe/i915simple/i915_fpc.c create mode 100644 src/mesa/pipe/i915simple/i915_fpc.h create mode 100644 src/mesa/pipe/i915simple/i915_fpc_debug.c create mode 100644 src/mesa/pipe/i915simple/i915_fpc_emit.c create mode 100644 src/mesa/pipe/i915simple/i915_fpc_translate.c diff --git a/src/mesa/pipe/i915simple/i915_fpc.c b/src/mesa/pipe/i915simple/i915_fpc.c new file mode 100644 index 00000000000..fd0bbbc4824 --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if 0 +#include + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#endif + +#include "i915_fpc.h" + + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg) +{ + fprintf(stderr, "i915_program_error: %s", msg); + p->fp->error = 1; +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, struct i915_fragment_program *fp) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->fp = fp; +#if 0 + p->env_param = NULL; /*i915->intel.ctx.FragmentProgram.Parameters;*/ +#endif + p->constants = i915->fs.constants; + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + memset(p->constant_flags, 0, sizeof(p->constant_flags)); + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = 0xffff000; + p->utemp_flag = ~0x7; + +#if 0 + p->fp->translated = 0; + p->fp->error = 0; + p->fp->nr_constants = 0; +#endif + p->fp->wpos_tex = -1; + p->fp->nr_params = 0; + + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_fp_compile *p) +{ + uint program_size = p->csr - p->program; + uint decl_size = p->decl - p->declarations; + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + if (p->fp->error) { + p->fp->NumNativeInstructions = 0; + p->fp->NumNativeAluInstructions = 0; + p->fp->NumNativeTexInstructions = 0; + p->fp->NumNativeTexIndirections = 0; + return; + } + else { + p->fp->NumNativeInstructions = (p->nr_alu_insn + + p->nr_tex_insn + + p->nr_decl_insn); + p->fp->NumNativeAluInstructions = p->nr_alu_insn; + p->fp->NumNativeTexInstructions = p->nr_tex_insn; + p->fp->NumNativeTexIndirections = p->nr_tex_indirect; + } + + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + memcpy(p->fp->program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(p->fp->program + decl_size, + p->program, + program_size * sizeof(uint)); + + p->fp->program_size = program_size + decl_size; + + /* Release the compilation struct: + */ + free(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +find_wpos_space(struct i915_fp_compile *p) +{ + const uint inputs = p->shader->inputs_read; + uint i; + + p->fp->wpos_tex = -1; + + if (inputs & FRAG_BIT_WPOS) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (FRAG_BIT_TEX0 << i)) == 0) { + p->fp->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +} + + + +void i915_compile_fragment_program( struct i915_context *i915, + struct i915_fragment_program *fp ) +{ + struct i915_fp_compile *p = i915_init_compile(i915, fp); + struct tgsi_token *tokens = i915->fs.tokens; + + find_wpos_space(p); + + i915_translate_program(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(p); +#if 0 + fp->translated = 1; +#endif +} diff --git a/src/mesa/pipe/i915simple/i915_fpc.h b/src/mesa/pipe/i915simple/i915_fpc.h new file mode 100644 index 00000000000..0a8bffcd9ae --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc.h @@ -0,0 +1,339 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + +#include "pipe/p_util.h" + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 +#define I915_MAX_CONSTANT 32 + +#define MAX_VARYING 8 + +enum +{ + FRAG_ATTRIB_WPOS = 0, + FRAG_ATTRIB_COL0 = 1, + FRAG_ATTRIB_COL1 = 2, + FRAG_ATTRIB_FOGC = 3, + FRAG_ATTRIB_TEX0 = 4, + FRAG_ATTRIB_TEX1 = 5, + FRAG_ATTRIB_TEX2 = 6, + FRAG_ATTRIB_TEX3 = 7, + FRAG_ATTRIB_TEX4 = 8, + FRAG_ATTRIB_TEX5 = 9, + FRAG_ATTRIB_TEX6 = 10, + FRAG_ATTRIB_TEX7 = 11, + FRAG_ATTRIB_VAR0 = 12, /**< shader varying */ + FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING) +}; + +/** + * Bitflags for fragment program input attributes. + */ +/*@{*/ +#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS) +#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0) +#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1) +#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC) +#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0) +#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1) +#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2) +#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3) +#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4) +#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5) +#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6) +#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7) +#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0) + +#define MAX_DRAW_BUFFERS 4 + +enum +{ + FRAG_RESULT_COLR = 0, + FRAG_RESULT_COLH = 1, + FRAG_RESULT_DEPR = 2, + FRAG_RESULT_DATA0 = 3, + FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +}; + + + +#if 1 /*XXX temp */ +/* Hardware version of a parsed fragment program. "Derived" from the + * mesa fragment_program struct. + */ +struct i915_fragment_program +{ +#if 0 + struct gl_fragment_program Base; +#else + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +#endif + + boolean error; /**< Set if i915_program_error() is called */ +#if 0 + uint id; /**< String id */ + boolean translated; +#endif + + /* Decls + instructions: + */ + uint program[I915_PROGRAM_SIZE]; + uint program_size; + +#if 0 + /* Constant buffer: + */ + float constant[I915_MAX_CONSTANT][4]; + uint nr_constants; +#endif + + /* Some of which are parameters: + */ + struct + { + uint reg; /* Hardware constant idx */ + const float *values; /* Pointer to tracked values */ + } param[I915_MAX_CONSTANT]; + uint nr_params; + +#if 0 + uint param_state; +#endif + uint wpos_tex; +}; +#endif + + +/*********************************************************************** + * Public interface for the compiler + */ + +void i915_compile_fragment_program( struct i915_context *i915, + struct i915_fragment_program *fp ); + + +/*********************************************************************** + * Private details of the compiler + */ + +struct i915_fp_compile { + struct i915_fragment_program *fp; + + struct pipe_shader_state *shader; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint constant_flags[I915_MAX_CONSTANT]; + + struct pipe_constant_buffer *constants; + + uint *csr; /* Cursor, points into program. + */ + + uint *decl; /* Cursor, points into declarations. + */ + + uint decl_s; /* flags for which s regs need to be decl'd */ + uint decl_t; /* flags for which t regs need to be decl'd */ + + uint temp_flag; /* Tracks temporary regs which are in + * use. + */ + + uint utemp_flag; /* Tracks TYPE_U temporary regs which are in + * use. + */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + +#if 0 + float (*env_param)[4]; +#endif +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, int x, int y, int z, int w) +{ + assert(x < 4); + assert(y < 4); + assert(z < 4); + assert(w < 4); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + +/* Another neat thing about the UREG representation: + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +#if 0 +extern uint i915_emit_param4fv(struct i915_fp_compile *p, + const float * values); +#endif + + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_program_error(struct i915_fp_compile *p, + const char *msg); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + +#if 0 +extern void i915_print_mesa_instructions( const struct prog_instruction *insn, + uint nr ); +#endif + +/*====================================================================== + * i915_fpc_translate.c + */ +void i915_fixup_depth_write(struct i915_fp_compile *p); + +extern void +i915_translate_program(struct i915_fp_compile *p, const struct tgsi_token *token); + + + +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_debug.c b/src/mesa/pipe/i915simple/i915_fpc_debug.c new file mode 100644 index 00000000000..77deab38bbb --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_debug.c @@ -0,0 +1,346 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if 0 +#include +#endif + +#include "i915_reg.h" +#include "i915_fpc.h" + +#if 0 +#include "shader/program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_print.h" +#endif + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(uint type, uint nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + printf("T_DIFFUSE"); + return; + case T_SPECULAR: + printf("T_SPECULAR"); + return; + case T_FOG_W: + printf("T_FOG_W"); + return; + default: + printf("T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + printf("oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + printf("oD"); + return; + } + break; + default: + break; + } + + printf("%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(uint reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + printf("."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + printf("-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + printf("x"); + break; + case 1: + printf("y"); + break; + case 2: + printf("z"); + break; + case 3: + printf("w"); + break; + case 4: + printf("0"); + break; + case 5: + printf("1"); + break; + default: + printf("?"); + break; + } + } +} + + +static void +print_src_reg(uint dword) +{ + uint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + uint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + print_reg_neg_swizzle(dword); +} + + +static void +print_dest_reg(uint dword) +{ + uint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + uint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + printf("."); + if (dword & A0_DEST_CHANNEL_X) + printf("x"); + if (dword & A0_DEST_CHANNEL_Y) + printf("y"); + if (dword & A0_DEST_CHANNEL_Z) + printf("z"); + if (dword & A0_DEST_CHANNEL_W) + printf("w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(uint opcode, const uint * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(program[0]); + if (program[0] & A0_DEST_SATURATE) + printf(" = SATURATE "); + else + printf(" = "); + } + + printf("%s ", opcodes[opcode]); + + print_src_reg(GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + printf("\n"); + return; + } + + printf(", "); + print_src_reg(GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + printf("\n"); + return; + } + + printf(", "); + print_src_reg(GET_SRC2_REG(program[2])); + printf("\n"); + return; +} + + +static void +print_tex_op(uint opcode, const uint * program) +{ + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + printf(" = "); + + printf("%s ", opcodes[opcode]); + + printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + printf("\n"); +} + +static void +print_dcl_op(uint opcode, const uint * program) +{ + printf("%s ", opcodes[opcode]); + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + printf("\n"); +} + + +void +i915_disassemble_program(const uint * program, uint sz) +{ + uint size = program[0] & 0x1ff; + int i; + + printf("\t\tBEGIN\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + uint opcode = program[0] & (0x1f << 24); + + printf("\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) + print_tex_op(opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(opcode >> 24, program); + else + printf("Unknown opcode 0x%x\n", opcode); + } + + printf("\t\tEND\n\n"); +} + + +#if 0 +void i915_print_mesa_instructions( const struct prog_instruction *insn, + uint nr ) +{ + uint i; + for (i = 0; i < nr; i++, insn++) { + printf("%3d: ", i); + print_instruction(insn); + } +} +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_emit.c b/src/mesa/pipe/i915simple/i915_fpc_emit.c new file mode 100644 index 00000000000..7259bb503db --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_emit.c @@ -0,0 +1,430 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if 0 +#include +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#endif + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_R, (bit - 1)); +} + + +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint op ) +{ + if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) { + /* No real way to work around this in the general case - need to + * allocate and declare a new temporary register (a utemp won't + * do). Will fallback for now. + */ + i915_program_error(p, "Can't (yet) swizzle TEX arguments"); + return 0; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + return dest; + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (op | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + return dest; + } +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + int reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 4; idx++) { +#if 0 + if (!(p->constant_flags[reg] & (1 << idx)) || + p->fp->constant[reg][idx] == c0) { + p->fp->constant[reg][idx] = c0; + p->constant_flags[reg] |= 1 << idx; + if (reg + 1 > p->fp->nr_constants) + p->fp->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } +#else + if (!(p->constant_flags[reg] & (1 << idx)) || + p->constants->constant[reg][idx] == c0) { + p->constants->constant[reg][idx] = c0; + p->constant_flags[reg] |= 1 << idx; + if (reg + 1 > p->constants->nr_constants) + p->constants->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } +#endif + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + int reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf || + p->constant_flags[reg] == I915_CONSTFLAG_PARAM) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(p->constant_flags[reg] & (3 << idx))) { +#if 0 + p->fp->constant[reg][idx] = c0; + p->fp->constant[reg][idx + 1] = c1; + p->constant_flags[reg] |= 3 << idx; + if (reg + 1 > p->fp->nr_constants) + p->fp->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, + ONE); +#else + p->constants->constant[reg][idx + 0] = c0; + p->constants->constant[reg][idx + 1] = c1; + p->constant_flags[reg] |= 3 << idx; + if (reg + 1 > p->constants->nr_constants) + p->constants->nr_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, + ONE); +#endif + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + int reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (p->constant_flags[reg] == 0xf && +#if 0 + p->fp->constant[reg][0] == c0 && + p->fp->constant[reg][1] == c1 && + p->fp->constant[reg][2] == c2 && + p->fp->constant[reg][3] == c3 +#else + p->constants->constant[reg][0] == c0 && + p->constants->constant[reg][1] == c1 && + p->constants->constant[reg][2] == c2 && + p->constants->constant[reg][3] == c3 +#endif + ) { + return UREG(REG_TYPE_CONST, reg); + } + else if (p->constant_flags[reg] == 0) { +#if 0 + p->fp->constant[reg][0] = c0; + p->fp->constant[reg][1] = c1; + p->fp->constant[reg][2] = c2; + p->fp->constant[reg][3] = c3; +#else + p->constants->constant[reg][0] = c0; + p->constants->constant[reg][1] = c1; + p->constants->constant[reg][2] = c2; + p->constants->constant[reg][3] = c3; +#endif + p->constant_flags[reg] = 0xf; +#if 0 + if (reg + 1 > p->fp->nr_constants) + p->fp->nr_constants = reg + 1; +#else + if (reg + 1 > p->constants->nr_constants) + p->constants->nr_constants = reg + 1; +#endif + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} + + +#if 00000/*UNUSED*/ +/* Reserve a slot in the constant file for a Mesa state parameter. + * These will later need to be tracked on statechanges, but that is + * done elsewhere. + */ +uint +i915_emit_param4fv(struct i915_fp_compile * p, const float * values) +{ + struct i915_fragment_program *fp = p->fp; + int i; + + for (i = 0; i < fp->nr_params; i++) { + if (fp->param[i].values == values) + return UREG(REG_TYPE_CONST, fp->param[i].reg); + } + +#if 0 + if (fp->nr_constants == I915_MAX_CONSTANT || + fp->nr_params == I915_MAX_CONSTANT) { +#else + if (p->constants->nr_constants == I915_MAX_CONSTANT || + fp->nr_params == I915_MAX_CONSTANT) { +#endif + i915_program_error(p, "i915_emit_param4fv: out of constants\n"); + return 0; + } + + { +#if 0 + int reg = fp->nr_constants++; +#else + int reg = p->constants->nr_constants++; +#endif + int i = fp->nr_params++; + + assert (p->constant_flags[reg] == 0); + p->constant_flags[reg] = I915_CONSTFLAG_PARAM; + + fp->param[i].values = values; + fp->param[i].reg = reg; + + return UREG(REG_TYPE_CONST, reg); + } +} +#endif diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c new file mode 100644 index 00000000000..a034e734c30 --- /dev/null +++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c @@ -0,0 +1,838 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/tgsi/core/tgsi_parse.h" + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0 / (3 * 2 * 1), + 1.0 / (5 * 4 * 3 * 2 * 1), + -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0 / (2 * 1), + 1.0 / (4 * 3 * 2 * 1), + -1.0 / (6 * 5 * 4 * 3 * 2 * 1) +}; + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + const uint index = source->SrcRegister.Index; + uint src; + + switch (source->SrcRegisterInd.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + switch (index) { + case FRAG_ATTRIB_WPOS: + src = i915_emit_decl(p, REG_TYPE_T, p->fp->wpos_tex, D0_CHANNEL_ALL); + break; + case FRAG_ATTRIB_COL0: + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + break; + case FRAG_ATTRIB_COL1: + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + break; + case FRAG_ATTRIB_FOGC: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + src = i915_emit_decl(p, REG_TYPE_T, + T_TEX0 + (index - FRAG_ATTRIB_TEX0), + D0_CHANNEL_ALL); + break; + + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + /* Various parameters and env values. All emitted to + * hardware as program constants. + */ +#if 0 + case PROGRAM_LOCAL_PARAM: + src = i915_emit_param4fv(p, program->Base.LocalParams[index]); + break; + case PROGRAM_ENV_PARAM: + src = i915_emit_param4fv(p, p->env_param[index]); + break; + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + src = i915_emit_param4fv( + p, program->Base.Parameters->ParameterValues[index]); + break; +#else + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; +#endif + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + + assert(!source->SrcRegister.Negate); + assert(!source->SrcRegisterExtSwz.NegateX); + assert(!source->SrcRegisterExtSwz.NegateY); + assert(!source->SrcRegisterExtSwz.NegateZ); + assert(!source->SrcRegisterExtSwz.NegateW); + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#if 0 + if (source->SrcRegister.Negate) + negate all + + if (extended source swiz per component) + src = negate(src, + source->SrcRegisterExtSwz.NegateX, + source->SrcRegisterExtSwz.NegateY, + source->SrcRegisterExtSwz.NegateZ, + source->SrcRegisterExtSwz.NegateW); + if (mod.abs) + absolute value + + if (mod.negate) + another negate; +#endif + return src; +} + + +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + switch (dest->DstRegister.Index) { + case FRAG_RESULT_COLR: + return UREG(REG_TYPE_OC, 0); + case FRAG_RESULT_DEPR: + return UREG(REG_TYPE_OD, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index"); + return 0; + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + + +#define EMIT_1ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 1) +#define EMIT_2ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 2) +#define EMIT_3ARG_ARITH( OP ) emit_simple_arith(p, inst, OP, 3) + + + +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + EMIT_2ARG_ARITH(A0_ADD); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (M_PI * 2)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + EMIT_2ARG_ARITH(A0_DP3); + break; + + case TGSI_OPCODE_DP4: + EMIT_2ARG_ARITH(A0_DP4); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + EMIT_1ARG_ARITH(A0_FLR); + break; + + case TGSI_OPCODE_FRC: + EMIT_1ARG_ARITH(A0_FRC); + break; + + case TGSI_OPCODE_KIL: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ + 0, src0, T0_TEXKILL); + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + EMIT_3ARG_ARITH(A0_MAD); + break; + + case TGSI_OPCODE_MAX: + EMIT_2ARG_ARITH(A0_MAX); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + /* aka TGSI_OPCODE_SWZ */ + EMIT_1ARG_ARITH(A0_MOV); + break; + + case TGSI_OPCODE_MUL: + EMIT_2ARG_ARITH(A0_MUL); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + EMIT_2ARG_ARITH(A0_SGE); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (M_PI * 2)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + EMIT_2ARG_ARITH(A0_SLT); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + emit_tex(p, inst, T0_TEXLD); + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode"); + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + * + * Parse failures -- Mesa doesn't currently give a good indication + * internally whether a particular program string parsed or not. This + * can lead to confusion -- hopefully we cope with it ok now. + */ +void +i915_translate_program(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + assert(0); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + + + +/* Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + if (p->shader->outputs_written & (1<