From: Brian Paul Date: Fri, 7 Aug 2015 20:56:51 +0000 (-0600) Subject: svga: add new svga_tgsi_vgpu10.c file X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=35bb29d4994efadd1719a147731afa34e78a0be1;p=mesa.git svga: add new svga_tgsi_vgpu10.c file Signed-off-by: Brian Paul --- diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c new file mode 100644 index 00000000000..60928d7a790 --- /dev/null +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -0,0 +1,6778 @@ +/********************************************************** + * Copyright 1998-2013 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +/** + * @file svga_tgsi_vgpu10.c + * + * TGSI -> VGPU10 shader translation. + * + * \author Mingcheng Chen + * \author Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_two_side.h" +#include "tgsi/tgsi_aa_point.h" +#include "tgsi/tgsi_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_bitmask.h" +#include "util/u_debug.h" +#include "util/u_pstipple.h" + +#include "svga_context.h" +#include "svga_debug.h" +#include "svga_link.h" +#include "svga_shader.h" +#include "svga_tgsi.h" + +#include "VGPU10ShaderTokens.h" + + +#define INVALID_INDEX 99999 +#define MAX_INTERNAL_TEMPS 3 +#define MAX_SYSTEM_VALUES 4 +#define MAX_IMMEDIATE_COUNT \ + (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) +#define MAX_TEMP_ARRAYS 64 /* Enough? */ + + +/** + * Clipping is complicated. There's four different cases which we + * handle during VS/GS shader translation: + */ +enum clipping_mode +{ + CLIP_NONE, /**< No clipping enabled */ + CLIP_LEGACY, /**< The shader has no clipping declarations or code but + * one or more user-defined clip planes are enabled. We + * generate extra code to emit clip distances. + */ + CLIP_DISTANCE, /**< The shader already declares clip distance output + * registers and has code to write to them. + */ + CLIP_VERTEX /**< The shader declares a clip vertex output register and + * has code that writes to the register. We convert the + * clipvertex position into one or more clip distances. + */ +}; + + +struct svga_shader_emitter_v10 +{ + /* The token output buffer */ + unsigned size; + char *buf; + char *ptr; + + /* Information about the shader and state (does not change) */ + struct svga_compile_key key; + struct tgsi_shader_info info; + unsigned unit; + + unsigned inst_start_token; + boolean discard_instruction; /**< throw away current instruction? */ + + union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; + unsigned num_immediates; /**< Number of immediates emitted */ + unsigned common_immediate_pos[8]; /**< literals for common immediates */ + unsigned num_common_immediates; + boolean immediates_emitted; + + unsigned num_outputs; /**< include any extra outputs */ + /** The first extra output is reserved for + * non-adjusted vertex position for + * stream output purpose + */ + + /* Temporary Registers */ + unsigned num_shader_temps; /**< num of temps used by original shader */ + unsigned internal_temp_count; /**< currently allocated internal temps */ + struct { + unsigned start, size; + } temp_arrays[MAX_TEMP_ARRAYS]; + unsigned num_temp_arrays; + + /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ + struct { + unsigned arrayId, index; + } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ + + /** Number of constants used by original shader for each constant buffer. + * The size should probably always match with that of svga_state.constbufs. + */ + unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; + + /* Samplers */ + unsigned num_samplers; + + /* Address regs (really implemented with temps) */ + unsigned num_address_regs; + unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; + + /* Output register usage masks */ + ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; + + /* To map TGSI system value index to VGPU shader input indexes */ + ubyte system_value_indexes[MAX_SYSTEM_VALUES]; + + struct { + /* vertex position scale/translation */ + unsigned out_index; /**< the real position output reg */ + unsigned tmp_index; /**< the fake/temp position output reg */ + unsigned so_index; /**< the non-adjusted position output reg */ + unsigned prescale_scale_index, prescale_trans_index; + boolean need_prescale; + } vposition; + + /* For vertex shaders only */ + struct { + /* viewport constant */ + unsigned viewport_index; + + /* temp index of adjusted vertex attributes */ + unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; + } vs; + + /* For fragment shaders only */ + struct { + /* apha test */ + unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ + unsigned color_tmp_index; /**< fake/temp color output reg */ + unsigned alpha_ref_index; /**< immediate constant for alpha ref */ + + /* front-face */ + unsigned face_input_index; /**< real fragment shader face reg (bool) */ + unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ + + unsigned pstipple_sampler_unit; + + unsigned fragcoord_input_index; /**< real fragment position input reg */ + unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ + } fs; + + /* For geometry shaders only */ + struct { + VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ + VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ + unsigned input_size; /**< size of input arrays */ + unsigned prim_id_index; /**< primitive id register index */ + unsigned max_out_vertices; /**< maximum number of output vertices */ + } gs; + + /* For vertex or geometry shaders */ + enum clipping_mode clip_mode; + unsigned clip_dist_out_index; /**< clip distance output register index */ + unsigned clip_dist_tmp_index; /**< clip distance temporary register */ + unsigned clip_dist_so_index; /**< clip distance shadow copy */ + + /** Index of temporary holding the clipvertex coordinate */ + unsigned clip_vertex_out_index; /**< clip vertex output register index */ + unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ + + /* user clip plane constant slot indexes */ + unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; + + boolean uses_flat_interp; + + /* For all shaders: const reg index for RECT coord scaling */ + unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; + + /* For all shaders: const reg index for texture buffer size */ + unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; + + /* VS/GS/FS Linkage info */ + struct shader_linkage linkage; + + bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ +}; + + +static boolean +emit_post_helpers(struct svga_shader_emitter_v10 *emit); + +static boolean +emit_vertex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst); + +static char err_buf[128]; + +static boolean +expand(struct svga_shader_emitter_v10 *emit) +{ + char *new_buf; + unsigned newsize = emit->size * 2; + + if (emit->buf != err_buf) + new_buf = REALLOC(emit->buf, emit->size, newsize); + else + new_buf = NULL; + + if (new_buf == NULL) { + emit->ptr = err_buf; + emit->buf = err_buf; + emit->size = sizeof(err_buf); + return FALSE; + } + + emit->size = newsize; + emit->ptr = new_buf + (emit->ptr - emit->buf); + emit->buf = new_buf; + return TRUE; +} + +/** + * Create and initialize a new svga_shader_emitter_v10 object. + */ +static struct svga_shader_emitter_v10 * +alloc_emitter(void) +{ + struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); + + if (!emit) + return NULL; + + /* to initialize the output buffer */ + emit->size = 512; + if (!expand(emit)) { + FREE(emit); + return NULL; + } + return emit; +} + +/** + * Free an svga_shader_emitter_v10 object. + */ +static void +free_emitter(struct svga_shader_emitter_v10 *emit) +{ + assert(emit); + FREE(emit->buf); /* will be NULL if translation succeeded */ + FREE(emit); +} + +static INLINE boolean +reserve(struct svga_shader_emitter_v10 *emit, + unsigned nr_dwords) +{ + while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { + if (!expand(emit)) + return FALSE; + } + + return TRUE; +} + +static boolean +emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) +{ + if (!reserve(emit, 1)) + return FALSE; + + *(uint32 *)emit->ptr = dword; + emit->ptr += sizeof dword; + return TRUE; +} + +static boolean +emit_dwords(struct svga_shader_emitter_v10 *emit, + const uint32 *dwords, + unsigned nr) +{ + if (!reserve(emit, nr)) + return FALSE; + + memcpy(emit->ptr, dwords, nr * sizeof *dwords); + emit->ptr += nr * sizeof *dwords; + return TRUE; +} + +/** Return the number of tokens in the emitter's buffer */ +static unsigned +emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) +{ + return (emit->ptr - emit->buf) / sizeof(unsigned); +} + + +/** + * Check for register overflow. If we overflow we'll set an + * error flag. This function can be called for register declarations + * or use as src/dst instruction operands. + * \param type register type. One of VGPU10_OPERAND_TYPE_x + or VGPU10_OPCODE_DCL_x + * \param index the register index + */ +static void +check_register_index(struct svga_shader_emitter_v10 *emit, + unsigned operandType, unsigned index) +{ + bool overflow_before = emit->register_overflow; + + switch (operandType) { + case VGPU10_OPERAND_TYPE_TEMP: + case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: + case VGPU10_OPCODE_DCL_TEMPS: + if (index >= VGPU10_MAX_TEMPS) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: + case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: + if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_INPUT: + case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: + case VGPU10_OPCODE_DCL_INPUT: + case VGPU10_OPCODE_DCL_INPUT_SGV: + case VGPU10_OPCODE_DCL_INPUT_SIV: + case VGPU10_OPCODE_DCL_INPUT_PS: + case VGPU10_OPCODE_DCL_INPUT_PS_SGV: + case VGPU10_OPCODE_DCL_INPUT_PS_SIV: + if ((emit->unit == PIPE_SHADER_VERTEX && + index >= VGPU10_MAX_VS_INPUTS) || + (emit->unit == PIPE_SHADER_GEOMETRY && + index >= VGPU10_MAX_GS_INPUTS) || + (emit->unit == PIPE_SHADER_FRAGMENT && + index >= VGPU10_MAX_FS_INPUTS)) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_OUTPUT: + case VGPU10_OPCODE_DCL_OUTPUT: + case VGPU10_OPCODE_DCL_OUTPUT_SGV: + case VGPU10_OPCODE_DCL_OUTPUT_SIV: + if ((emit->unit == PIPE_SHADER_VERTEX && + index >= VGPU10_MAX_VS_OUTPUTS) || + (emit->unit == PIPE_SHADER_GEOMETRY && + index >= VGPU10_MAX_GS_OUTPUTS) || + (emit->unit == PIPE_SHADER_FRAGMENT && + index >= VGPU10_MAX_FS_OUTPUTS)) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_SAMPLER: + case VGPU10_OPCODE_DCL_SAMPLER: + if (index >= VGPU10_MAX_SAMPLERS) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_RESOURCE: + case VGPU10_OPCODE_DCL_RESOURCE: + if (index >= VGPU10_MAX_RESOURCES) { + emit->register_overflow = TRUE; + } + break; + case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + if (index >= MAX_IMMEDIATE_COUNT) { + emit->register_overflow = TRUE; + } + break; + default: + assert(0); + ; /* nothing */ + } + + if (emit->register_overflow && !overflow_before) { + debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", + operandType, index); + } +} + + +/** + * Examine misc state to determine the clipping mode. + */ +static void +determine_clipping_mode(struct svga_shader_emitter_v10 *emit) +{ + if (emit->info.num_written_clipdistance > 0) { + emit->clip_mode = CLIP_DISTANCE; + } + else if (emit->info.writes_clipvertex) { + emit->clip_mode = CLIP_VERTEX; + } + else if (emit->key.clip_plane_enable) { + emit->clip_mode = CLIP_LEGACY; + } + else { + emit->clip_mode = CLIP_NONE; + } +} + + +/** + * For clip distance register declarations and clip distance register + * writes we need to mask the declaration usage or instruction writemask + * (respectively) against the set of the really-enabled clipping planes. + * + * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables + * has a VS that writes to all 8 clip distance registers, but the plane enable + * flags are a subset of that. + * + * This function is used to apply the plane enable flags to the register + * declaration or instruction writemask. + * + * \param writemask the declaration usage mask or instruction writemask + * \param clip_reg_index which clip plane register is being declared/written. + * The legal values are 0 and 1 (two clip planes per + * register, for a total of 8 clip planes) + */ +static unsigned +apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, + unsigned writemask, unsigned clip_reg_index) +{ + unsigned shift; + + assert(clip_reg_index < 2); + + /* four clip planes per clip register: */ + shift = clip_reg_index * 4; + writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); + + return writemask; +} + + +/** + * Translate gallium shader type into VGPU10 type. + */ +static VGPU10_PROGRAM_TYPE +translate_shader_type(unsigned type) +{ + switch (type) { + case PIPE_SHADER_VERTEX: + return VGPU10_VERTEX_SHADER; + case PIPE_SHADER_GEOMETRY: + return VGPU10_GEOMETRY_SHADER; + case PIPE_SHADER_FRAGMENT: + return VGPU10_PIXEL_SHADER; + default: + assert(!"Unexpected shader type"); + return VGPU10_VERTEX_SHADER; + } +} + + +/** + * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x + * Note: we only need to translate the opcodes for "simple" instructions, + * as seen below. All other opcodes are handled/translated specially. + */ +static VGPU10_OPCODE_TYPE +translate_opcode(unsigned opcode) +{ + switch (opcode) { + case TGSI_OPCODE_MOV: + return VGPU10_OPCODE_MOV; + case TGSI_OPCODE_MUL: + return VGPU10_OPCODE_MUL; + case TGSI_OPCODE_ADD: + return VGPU10_OPCODE_ADD; + case TGSI_OPCODE_DP3: + return VGPU10_OPCODE_DP3; + case TGSI_OPCODE_DP4: + return VGPU10_OPCODE_DP4; + case TGSI_OPCODE_MIN: + return VGPU10_OPCODE_MIN; + case TGSI_OPCODE_MAX: + return VGPU10_OPCODE_MAX; + case TGSI_OPCODE_MAD: + return VGPU10_OPCODE_MAD; + case TGSI_OPCODE_SQRT: + return VGPU10_OPCODE_SQRT; + case TGSI_OPCODE_FRC: + return VGPU10_OPCODE_FRC; + case TGSI_OPCODE_FLR: + return VGPU10_OPCODE_ROUND_NI; + case TGSI_OPCODE_FSEQ: + return VGPU10_OPCODE_EQ; + case TGSI_OPCODE_FSGE: + return VGPU10_OPCODE_GE; + case TGSI_OPCODE_FSNE: + return VGPU10_OPCODE_NE; + case TGSI_OPCODE_DDX: + return VGPU10_OPCODE_DERIV_RTX; + case TGSI_OPCODE_DDY: + return VGPU10_OPCODE_DERIV_RTY; + case TGSI_OPCODE_RET: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_DIV: + return VGPU10_OPCODE_DIV; + case TGSI_OPCODE_IDIV: + return VGPU10_OPCODE_IDIV; + case TGSI_OPCODE_DP2: + return VGPU10_OPCODE_DP2; + case TGSI_OPCODE_BRK: + return VGPU10_OPCODE_BREAK; + case TGSI_OPCODE_IF: + return VGPU10_OPCODE_IF; + case TGSI_OPCODE_ELSE: + return VGPU10_OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: + return VGPU10_OPCODE_ENDIF; + case TGSI_OPCODE_CEIL: + return VGPU10_OPCODE_ROUND_PI; + case TGSI_OPCODE_I2F: + return VGPU10_OPCODE_ITOF; + case TGSI_OPCODE_NOT: + return VGPU10_OPCODE_NOT; + case TGSI_OPCODE_TRUNC: + return VGPU10_OPCODE_ROUND_Z; + case TGSI_OPCODE_SHL: + return VGPU10_OPCODE_ISHL; + case TGSI_OPCODE_AND: + return VGPU10_OPCODE_AND; + case TGSI_OPCODE_OR: + return VGPU10_OPCODE_OR; + case TGSI_OPCODE_XOR: + return VGPU10_OPCODE_XOR; + case TGSI_OPCODE_CONT: + return VGPU10_OPCODE_CONTINUE; + case TGSI_OPCODE_EMIT: + return VGPU10_OPCODE_EMIT; + case TGSI_OPCODE_ENDPRIM: + return VGPU10_OPCODE_CUT; + case TGSI_OPCODE_BGNLOOP: + return VGPU10_OPCODE_LOOP; + case TGSI_OPCODE_ENDLOOP: + return VGPU10_OPCODE_ENDLOOP; + case TGSI_OPCODE_ENDSUB: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_NOP: + return VGPU10_OPCODE_NOP; + case TGSI_OPCODE_BREAKC: + return VGPU10_OPCODE_BREAKC; + case TGSI_OPCODE_END: + return VGPU10_OPCODE_RET; + case TGSI_OPCODE_F2I: + return VGPU10_OPCODE_FTOI; + case TGSI_OPCODE_IMAX: + return VGPU10_OPCODE_IMAX; + case TGSI_OPCODE_IMIN: + return VGPU10_OPCODE_IMIN; + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_MOD: + return VGPU10_OPCODE_UDIV; + case TGSI_OPCODE_IMUL_HI: + return VGPU10_OPCODE_IMUL; + case TGSI_OPCODE_INEG: + return VGPU10_OPCODE_INEG; + case TGSI_OPCODE_ISHR: + return VGPU10_OPCODE_ISHR; + case TGSI_OPCODE_ISGE: + return VGPU10_OPCODE_IGE; + case TGSI_OPCODE_ISLT: + return VGPU10_OPCODE_ILT; + case TGSI_OPCODE_F2U: + return VGPU10_OPCODE_FTOU; + case TGSI_OPCODE_UADD: + return VGPU10_OPCODE_IADD; + case TGSI_OPCODE_U2F: + return VGPU10_OPCODE_UTOF; + case TGSI_OPCODE_UCMP: + return VGPU10_OPCODE_MOVC; + case TGSI_OPCODE_UMAD: + return VGPU10_OPCODE_UMAD; + case TGSI_OPCODE_UMAX: + return VGPU10_OPCODE_UMAX; + case TGSI_OPCODE_UMIN: + return VGPU10_OPCODE_UMIN; + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMUL_HI: + return VGPU10_OPCODE_UMUL; + case TGSI_OPCODE_USEQ: + return VGPU10_OPCODE_IEQ; + case TGSI_OPCODE_USGE: + return VGPU10_OPCODE_UGE; + case TGSI_OPCODE_USHR: + return VGPU10_OPCODE_USHR; + case TGSI_OPCODE_USLT: + return VGPU10_OPCODE_ULT; + case TGSI_OPCODE_USNE: + return VGPU10_OPCODE_INE; + case TGSI_OPCODE_SWITCH: + return VGPU10_OPCODE_SWITCH; + case TGSI_OPCODE_CASE: + return VGPU10_OPCODE_CASE; + case TGSI_OPCODE_DEFAULT: + return VGPU10_OPCODE_DEFAULT; + case TGSI_OPCODE_ENDSWITCH: + return VGPU10_OPCODE_ENDSWITCH; + case TGSI_OPCODE_FSLT: + return VGPU10_OPCODE_LT; + case TGSI_OPCODE_ROUND: + return VGPU10_OPCODE_ROUND_NE; + default: + assert(!"Unexpected TGSI opcode in translate_opcode()"); + return VGPU10_OPCODE_NOP; + } +} + + +/** + * Translate a TGSI register file type into a VGPU10 operand type. + * \param array is the TGSI_FILE_TEMPORARY register an array? + */ +static VGPU10_OPERAND_TYPE +translate_register_file(enum tgsi_file_type file, boolean array) +{ + switch (file) { + case TGSI_FILE_CONSTANT: + return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; + case TGSI_FILE_INPUT: + return VGPU10_OPERAND_TYPE_INPUT; + case TGSI_FILE_OUTPUT: + return VGPU10_OPERAND_TYPE_OUTPUT; + case TGSI_FILE_TEMPORARY: + return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP + : VGPU10_OPERAND_TYPE_TEMP; + case TGSI_FILE_IMMEDIATE: + /* all immediates are 32-bit values at this time so + * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. + */ + return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; + case TGSI_FILE_SAMPLER: + return VGPU10_OPERAND_TYPE_SAMPLER; + case TGSI_FILE_SYSTEM_VALUE: + return VGPU10_OPERAND_TYPE_INPUT; + + /* XXX TODO more cases to finish */ + + default: + assert(!"Bad tgsi register file!"); + return VGPU10_OPERAND_TYPE_NULL; + } +} + + +/** + * Emit a null dst register + */ +static void +emit_null_dst_register(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OperandToken0 operand; + + operand.value = 0; + operand.operandType = VGPU10_OPERAND_TYPE_NULL; + operand.numComponents = VGPU10_OPERAND_0_COMPONENT; + + emit_dword(emit, operand.value); +} + + +/** + * If the given register is a temporary, return the array ID. + * Else return zero. + */ +static unsigned +get_temp_array_id(const struct svga_shader_emitter_v10 *emit, + unsigned file, unsigned index) +{ + if (file == TGSI_FILE_TEMPORARY) { + return emit->temp_map[index].arrayId; + } + else { + return 0; + } +} + + +/** + * If the given register is a temporary, convert the index from a TGSI + * TEMPORARY index to a VGPU10 temp index. + */ +static unsigned +remap_temp_index(const struct svga_shader_emitter_v10 *emit, + unsigned file, unsigned index) +{ + if (file == TGSI_FILE_TEMPORARY) { + return emit->temp_map[index].index; + } + else { + return index; + } +} + + +/** + * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). + * Note: the operandType field must already be initialized. + */ +static VGPU10OperandToken0 +setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, + VGPU10OperandToken0 operand0, + unsigned file, + boolean indirect, boolean index2D, + unsigned tempArrayID) +{ + unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D; + + /* + * Compute index dimensions + */ + if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || + operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { + /* there's no swizzle for in-line immediates */ + indexDim = VGPU10_OPERAND_INDEX_0D; + assert(operand0.selectionMode == 0); + } + else { + if (index2D || + tempArrayID > 0 || + operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { + indexDim = VGPU10_OPERAND_INDEX_2D; + } + else { + indexDim = VGPU10_OPERAND_INDEX_1D; + } + } + + /* + * Compute index representations (immediate, relative, etc). + */ + if (tempArrayID > 0) { + assert(file == TGSI_FILE_TEMPORARY); + /* First index is the array ID, second index is the array element */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (indirect) { + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + else { + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + } + } + else if (indirect) { + if (file == TGSI_FILE_CONSTANT) { + /* index[0] indicates which constant buffer while index[1] indicates + * the position in the constant buffer. + */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + else { + /* All other register files are 1-dimensional */ + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; + } + } + else { + index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; + } + + operand0.indexDimension = indexDim; + operand0.index0Representation = index0Rep; + operand0.index1Representation = index1Rep; + + return operand0; +} + + +/** + * Emit the operand for expressing an address register for indirect indexing. + * Note that the address register is really just a temp register. + * \param addr_reg_index which address register to use + */ +static void +emit_indirect_register(struct svga_shader_emitter_v10 *emit, + unsigned addr_reg_index) +{ + unsigned tmp_reg_index; + VGPU10OperandToken0 operand0; + + assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); + + tmp_reg_index = emit->address_reg_index[addr_reg_index]; + + /* operand0 is a simple temporary register, selecting one component */ + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + operand0.swizzleX = 0; + operand0.swizzleY = 1; + operand0.swizzleZ = 2; + operand0.swizzleW = 3; + + emit_dword(emit, operand0.value); + emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); +} + + +/** + * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. + * \param emit the emitter context + * \param reg the TGSI dst register to translate + */ +static void +emit_dst_register(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *reg) +{ + unsigned file = reg->Register.File; + unsigned index = reg->Register.Index; + const unsigned sem_name = emit->info.output_semantic_name[index]; + const unsigned sem_index = emit->info.output_semantic_index[index]; + unsigned writemask = reg->Register.WriteMask; + const unsigned indirect = reg->Register.Indirect; + const unsigned tempArrayId = get_temp_array_id(emit, file, index); + const unsigned index2d = reg->Register.Dimension; + VGPU10OperandToken0 operand0; + + if (file == TGSI_FILE_OUTPUT) { + if (emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY) { + if (index == emit->vposition.out_index && + emit->vposition.tmp_index != INVALID_INDEX) { + /* replace OUTPUT[POS] with TEMP[POS]. We need to store the + * vertex position result in a temporary so that we can modify + * it in the post_helper() code. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->vposition.tmp_index; + } + else if (sem_name == TGSI_SEMANTIC_CLIPDIST && + emit->clip_dist_tmp_index != INVALID_INDEX) { + /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. + * We store the clip distance in a temporary first, then + * we'll copy it to the shadow copy and to CLIPDIST with the + * enabled planes mask in emit_clip_distance_instructions(). + */ + file = TGSI_FILE_TEMPORARY; + index = emit->clip_dist_tmp_index + sem_index; + } + else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && + emit->clip_vertex_tmp_index != INVALID_INDEX) { + /* replace the CLIPVERTEX output register with a temporary */ + assert(emit->clip_mode == CLIP_VERTEX); + assert(sem_index == 0); + file = TGSI_FILE_TEMPORARY; + index = emit->clip_vertex_tmp_index; + } + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (sem_name == TGSI_SEMANTIC_POSITION) { + /* Fragment depth output register */ + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + emit_dword(emit, operand0.value); + return; + } + else if (index == emit->fs.color_out_index[0] && + emit->fs.color_tmp_index != INVALID_INDEX) { + /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the + * fragment color result in a temporary so that we can read it + * it in the post_helper() code. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.color_tmp_index; + } + else { + /* Typically, for fragment shaders, the output register index + * matches the color semantic index. But not when we write to + * the fragment depth register. In that case, OUT[0] will be + * fragdepth and OUT[1] will be the 0th color output. We need + * to use the semantic index for color outputs. + */ + assert(sem_name == TGSI_SEMANTIC_COLOR); + index = emit->info.output_semantic_index[index]; + } + } + } + + /* init operand tokens to all zero */ + operand0.value = 0; + + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + /* the operand has a writemask */ + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + + /* Which of the four dest components to write to. Note that we can use a + * simple assignment here since TGSI writemasks match VGPU10 writemasks. + */ + STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); + operand0.mask = writemask; + + /* translate TGSI register file type to VGPU10 operand type */ + operand0.operandType = translate_register_file(file, tempArrayId > 0); + + check_register_index(emit, operand0.operandType, index); + + operand0 = setup_operand0_indexing(emit, operand0, file, indirect, + index2d, tempArrayId); + + /* Emit tokens */ + emit_dword(emit, operand0.value); + if (tempArrayId > 0) { + emit_dword(emit, tempArrayId); + } + + emit_dword(emit, remap_temp_index(emit, file, index)); + + if (indirect) { + emit_indirect_register(emit, reg->Indirect.Index); + } +} + + +/** + * Translate a src register of a TGSI instruction and emit VGPU10 tokens. + */ +static void +emit_src_register(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_src_register *reg) +{ + unsigned file = reg->Register.File; + unsigned index = reg->Register.Index; + const unsigned indirect = reg->Register.Indirect; + const unsigned tempArrayId = get_temp_array_id(emit, file, index); + const unsigned index2d = reg->Register.Dimension; + const unsigned swizzleX = reg->Register.SwizzleX; + const unsigned swizzleY = reg->Register.SwizzleY; + const unsigned swizzleZ = reg->Register.SwizzleZ; + const unsigned swizzleW = reg->Register.SwizzleW; + const unsigned absolute = reg->Register.Absolute; + const unsigned negate = reg->Register.Negate; + bool is_prim_id = FALSE; + + VGPU10OperandToken0 operand0; + VGPU10OperandToken1 operand1; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + file == TGSI_FILE_INPUT) { + if (index == emit->fs.face_input_index) { + /* Replace INPUT[FACE] with TEMP[FACE] */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.face_tmp_index; + } + else if (index == emit->fs.fragcoord_input_index) { + /* Replace INPUT[POSITION] with TEMP[POSITION] */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.fragcoord_tmp_index; + } + else { + /* We remap fragment shader inputs to that FS input indexes + * match up with VS/GS output indexes. + */ + index = emit->linkage.input_map[index]; + } + } + else if (emit->unit == PIPE_SHADER_GEOMETRY && + file == TGSI_FILE_INPUT) { + is_prim_id = (index == emit->gs.prim_id_index); + index = emit->linkage.input_map[index]; + } + else if (emit->unit == PIPE_SHADER_VERTEX) { + if (file == TGSI_FILE_INPUT) { + /* if input is adjusted... */ + if ((emit->key.vs.adjust_attrib_w_1 | + emit->key.vs.adjust_attrib_itof | + emit->key.vs.adjust_attrib_utof | + emit->key.vs.attrib_is_bgra | + emit->key.vs.attrib_puint_to_snorm | + emit->key.vs.attrib_puint_to_uscaled | + emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { + file = TGSI_FILE_TEMPORARY; + index = emit->vs.adjusted_input[index]; + } + } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + assert(index < Elements(emit->system_value_indexes)); + index = emit->system_value_indexes[index]; + } + } + + operand0.value = operand1.value = 0; + + if (is_prim_id) { + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + } + else { + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = translate_register_file(file, tempArrayId > 0); + } + + operand0 = setup_operand0_indexing(emit, operand0, file, indirect, + index2d, tempArrayId); + + if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && + operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { + /* there's no swizzle for in-line immediates */ + if (swizzleX == swizzleY && + swizzleX == swizzleZ && + swizzleX == swizzleW) { + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + } + else { + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + } + + operand0.swizzleX = swizzleX; + operand0.swizzleY = swizzleY; + operand0.swizzleZ = swizzleZ; + operand0.swizzleW = swizzleW; + + if (absolute || negate) { + operand0.extended = 1; + operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; + if (absolute && !negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; + if (!absolute && negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; + if (absolute && negate) + operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; + } + } + + /* Emit the operand tokens */ + emit_dword(emit, operand0.value); + if (operand0.extended) + emit_dword(emit, operand1.value); + + if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { + /* Emit the four float/int in-line immediate values */ + unsigned *c; + assert(index < Elements(emit->immediates)); + assert(file == TGSI_FILE_IMMEDIATE); + assert(swizzleX < 4); + assert(swizzleY < 4); + assert(swizzleZ < 4); + assert(swizzleW < 4); + c = (unsigned *) emit->immediates[index]; + emit_dword(emit, c[swizzleX]); + emit_dword(emit, c[swizzleY]); + emit_dword(emit, c[swizzleZ]); + emit_dword(emit, c[swizzleW]); + } + else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { + /* Emit the register index(es) */ + if (index2d || + operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { + emit_dword(emit, reg->Dimension.Index); + } + + if (tempArrayId > 0) { + emit_dword(emit, tempArrayId); + } + + emit_dword(emit, remap_temp_index(emit, file, index)); + + if (indirect) { + emit_indirect_register(emit, reg->Indirect.Index); + } + } +} + + +/** + * Emit a resource operand (for use with a SAMPLE instruction). + */ +static void +emit_resource_register(struct svga_shader_emitter_v10 *emit, + unsigned resource_number) +{ + VGPU10OperandToken0 operand0; + + check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_Y; + operand0.swizzleZ = VGPU10_COMPONENT_Z; + operand0.swizzleW = VGPU10_COMPONENT_W; + + emit_dword(emit, operand0.value); + emit_dword(emit, resource_number); +} + + +/** + * Emit a sampler operand (for use with a SAMPLE instruction). + */ +static void +emit_sampler_register(struct svga_shader_emitter_v10 *emit, + unsigned sampler_number) +{ + VGPU10OperandToken0 operand0; + + check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + + emit_dword(emit, operand0.value); + emit_dword(emit, sampler_number); +} + + +/** + * Emit an operand which reads the IS_FRONT_FACING register. + */ +static void +emit_face_register(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OperandToken0 operand0; + unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; + + /* init */ + operand0.value = 0; + + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_X; + operand0.swizzleZ = VGPU10_COMPONENT_X; + operand0.swizzleW = VGPU10_COMPONENT_X; + + emit_dword(emit, operand0.value); + emit_dword(emit, index); +} + + +/** + * Emit the token for a VGPU10 opcode. + * \param saturate clamp result to [0,1]? + */ +static void +emit_opcode(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate) +{ + VGPU10OpcodeToken0 token0; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = vgpu10_opcode; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.saturate = saturate; + + emit_dword(emit, token0.value); +} + + +/** + * Emit the token for a VGPU10 resinfo instruction. + * \param modifier return type modifier, _uint or _rcpFloat. + * TODO: We may want to remove this parameter if it will + * only ever be used as _uint. + */ +static void +emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, + VGPU10_RESINFO_RETURN_TYPE modifier) +{ + VGPU10OpcodeToken0 token0; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = VGPU10_OPCODE_RESINFO; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.resinfoReturnType = modifier; + + emit_dword(emit, token0.value); +} + + +/** + * Emit opcode tokens for a texture sample instruction. Texture instructions + * can be rather complicated (texel offsets, etc) so we have this specialized + * function. + */ +static void +emit_sample_opcode(struct svga_shader_emitter_v10 *emit, + unsigned vgpu10_opcode, boolean saturate, + const int offsets[3]) +{ + VGPU10OpcodeToken0 token0; + VGPU10OpcodeToken1 token1; + + token0.value = 0; /* init all fields to zero */ + token0.opcodeType = vgpu10_opcode; + token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ + token0.saturate = saturate; + + if (offsets[0] || offsets[1] || offsets[2]) { + assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); + assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); + + token0.extended = 1; + token1.value = 0; + token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; + token1.offsetU = offsets[0]; + token1.offsetV = offsets[1]; + token1.offsetW = offsets[2]; + } + + emit_dword(emit, token0.value); + if (token0.extended) { + emit_dword(emit, token1.value); + } +} + + +/** + * Emit a DISCARD opcode token. + * If nonzero is set, we'll discard the fragment if the X component is not 0. + * Otherwise, we'll discard the fragment if the X component is 0. + */ +static void +emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) +{ + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DISCARD; + if (nonzero) + opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; + + emit_dword(emit, opcode0.value); +} + + +/** + * We need to call this before we begin emitting a VGPU10 instruction. + */ +static void +begin_emit_instruction(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->inst_start_token == 0); + /* Save location of the instruction's VGPU10OpcodeToken0 token. + * Note, we can't save a pointer because it would become invalid if + * we have to realloc the output buffer. + */ + emit->inst_start_token = emit_get_num_tokens(emit); +} + + +/** + * We need to call this after we emit the last token of a VGPU10 instruction. + * This function patches in the opcode token's instructionLength field. + */ +static void +end_emit_instruction(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; + unsigned inst_length; + + assert(emit->inst_start_token > 0); + + if (emit->discard_instruction) { + /* Back up the emit->ptr to where this instruction started so + * that we discard the current instruction. + */ + emit->ptr = (char *) (tokens + emit->inst_start_token); + } + else { + /* Compute instruction length and patch that into the start of + * the instruction. + */ + inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; + + assert(inst_length > 0); + + tokens[emit->inst_start_token].instructionLength = inst_length; + } + + emit->inst_start_token = 0; /* reset to zero for error checking */ + emit->discard_instruction = FALSE; +} + + +/** + * Return index for a free temporary register. + */ +static unsigned +get_temp_index(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); + return emit->num_shader_temps + emit->internal_temp_count++; +} + + +/** + * Release the temporaries which were generated by get_temp_index(). + */ +static void +free_temp_indexes(struct svga_shader_emitter_v10 *emit) +{ + emit->internal_temp_count = 0; +} + + +/** + * Create a tgsi_full_src_register. + */ +static struct tgsi_full_src_register +make_src_reg(unsigned file, unsigned index) +{ + struct tgsi_full_src_register reg; + + memset(®, 0, sizeof(reg)); + reg.Register.File = file; + reg.Register.Index = index; + reg.Register.SwizzleX = TGSI_SWIZZLE_X; + reg.Register.SwizzleY = TGSI_SWIZZLE_Y; + reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; + reg.Register.SwizzleW = TGSI_SWIZZLE_W; + return reg; +} + + +/** + * Create a tgsi_full_src_register for a temporary. + */ +static struct tgsi_full_src_register +make_src_temp_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_TEMPORARY, index); +} + + +/** + * Create a tgsi_full_src_register for a constant. + */ +static struct tgsi_full_src_register +make_src_const_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_CONSTANT, index); +} + + +/** + * Create a tgsi_full_src_register for an immediate constant. + */ +static struct tgsi_full_src_register +make_src_immediate_reg(unsigned index) +{ + return make_src_reg(TGSI_FILE_IMMEDIATE, index); +} + + +/** + * Create a tgsi_full_dst_register. + */ +static struct tgsi_full_dst_register +make_dst_reg(unsigned file, unsigned index) +{ + struct tgsi_full_dst_register reg; + + memset(®, 0, sizeof(reg)); + reg.Register.File = file; + reg.Register.Index = index; + reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; + return reg; +} + + +/** + * Create a tgsi_full_dst_register for a temporary. + */ +static struct tgsi_full_dst_register +make_dst_temp_reg(unsigned index) +{ + return make_dst_reg(TGSI_FILE_TEMPORARY, index); +} + + +/** + * Create a tgsi_full_dst_register for an output. + */ +static struct tgsi_full_dst_register +make_dst_output_reg(unsigned index) +{ + return make_dst_reg(TGSI_FILE_OUTPUT, index); +} + + +/** + * Create negated tgsi_full_src_register. + */ +static struct tgsi_full_src_register +negate_src(const struct tgsi_full_src_register *reg) +{ + struct tgsi_full_src_register neg = *reg; + neg.Register.Negate = !reg->Register.Negate; + return neg; +} + +/** + * Create absolute value of a tgsi_full_src_register. + */ +static struct tgsi_full_src_register +absolute_src(const struct tgsi_full_src_register *reg) +{ + struct tgsi_full_src_register absolute = *reg; + absolute.Register.Absolute = 1; + return absolute; +} + + +/** Return the named swizzle term from the src register */ +static INLINE unsigned +get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) +{ + switch (term) { + case TGSI_SWIZZLE_X: + return reg->Register.SwizzleX; + case TGSI_SWIZZLE_Y: + return reg->Register.SwizzleY; + case TGSI_SWIZZLE_Z: + return reg->Register.SwizzleZ; + case TGSI_SWIZZLE_W: + return reg->Register.SwizzleW; + default: + assert(!"Bad swizzle"); + return TGSI_SWIZZLE_X; + } +} + + +/** + * Create swizzled tgsi_full_src_register. + */ +static struct tgsi_full_src_register +swizzle_src(const struct tgsi_full_src_register *reg, + unsigned swizzleX, unsigned swizzleY, + unsigned swizzleZ, unsigned swizzleW) +{ + struct tgsi_full_src_register swizzled = *reg; + /* Note: we swizzle the current swizzle */ + swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); + swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); + swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); + swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); + return swizzled; +} + + +/** + * Create swizzled tgsi_full_src_register where all the swizzle + * terms are the same. + */ +static struct tgsi_full_src_register +scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) +{ + struct tgsi_full_src_register swizzled = *reg; + /* Note: we swizzle the current swizzle */ + swizzled.Register.SwizzleX = + swizzled.Register.SwizzleY = + swizzled.Register.SwizzleZ = + swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); + return swizzled; +} + + +/** + * Create new tgsi_full_dst_register with writemask. + * \param mask bitmask of TGSI_WRITEMASK_[XYZW] + */ +static struct tgsi_full_dst_register +writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) +{ + struct tgsi_full_dst_register masked = *reg; + masked.Register.WriteMask = mask; + return masked; +} + + +/** + * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. + */ +static boolean +same_swizzle_terms(const struct tgsi_full_src_register *reg) +{ + return (reg->Register.SwizzleX == reg->Register.SwizzleY && + reg->Register.SwizzleY == reg->Register.SwizzleZ && + reg->Register.SwizzleZ == reg->Register.SwizzleW); +} + + +/** + * Search the vector for the value 'x' and return its position. + */ +static int +find_imm_in_vec4(const union tgsi_immediate_data vec[4], + union tgsi_immediate_data x) +{ + unsigned i; + for (i = 0; i < 4; i++) { + if (vec[i].Int == x.Int) + return i; + } + return -1; +} + + +/** + * Helper used by make_immediate_reg(), make_immediate_reg_4(). + */ +static int +find_immediate(struct svga_shader_emitter_v10 *emit, + union tgsi_immediate_data x, unsigned startIndex) +{ + const unsigned endIndex = emit->num_immediates; + unsigned i; + + assert(emit->immediates_emitted); + + /* Search immediates for x, y, z, w */ + for (i = startIndex; i < endIndex; i++) { + if (x.Int == emit->immediates[i][0].Int || + x.Int == emit->immediates[i][1].Int || + x.Int == emit->immediates[i][2].Int || + x.Int == emit->immediates[i][3].Int) { + return i; + } + } + /* Should never try to use an immediate value that wasn't pre-declared */ + assert(!"find_immediate() failed!"); + return -1; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal + * union tgsi_immediate_data[4] value. + * Note: the values must have been previously declared/allocated in + * emit_pre_helpers(). And, all of x,y,z,w must be located in the same + * vec4 immediate. + */ +static struct tgsi_full_src_register +make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, + const union tgsi_immediate_data imm[4]) +{ + struct tgsi_full_src_register reg; + unsigned i; + + for (i = 0; i < emit->num_common_immediates; i++) { + /* search for first component value */ + int immpos = find_immediate(emit, imm[0], i); + int x, y, z, w; + + assert(immpos >= 0); + + /* find remaining components within the immediate vector */ + x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); + y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); + z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); + w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); + + if (x >=0 && y >= 0 && z >= 0 && w >= 0) { + /* found them all */ + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + reg.Register.Index = immpos; + reg.Register.SwizzleX = x; + reg.Register.SwizzleY = y; + reg.Register.SwizzleZ = z; + reg.Register.SwizzleW = w; + return reg; + } + /* else, keep searching */ + } + + assert(!"Failed to find immediate register!"); + + /* Just return IMM[0].xxxx */ + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + return reg; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal + * union tgsi_immediate_data value of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg(struct svga_shader_emitter_v10 *emit, + union tgsi_immediate_data value) +{ + struct tgsi_full_src_register reg; + int immpos = find_immediate(emit, value, 0); + + assert(immpos >= 0); + + memset(®, 0, sizeof(reg)); + reg.Register.File = TGSI_FILE_IMMEDIATE; + reg.Register.Index = immpos; + reg.Register.SwizzleX = + reg.Register.SwizzleY = + reg.Register.SwizzleZ = + reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); + + return reg; +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal float[4] value. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, + float x, float y, float z, float w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Float = x; + imm[1].Float = y; + imm[2].Float = z; + imm[3].Float = w; + return make_immediate_reg_4(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal float value + * of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) +{ + union tgsi_immediate_data imm; + imm.Float = value; + return make_immediate_reg(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal int[4] vector. + */ +static struct tgsi_full_src_register +make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, + int x, int y, int z, int w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Int = x; + imm[1].Int = y; + imm[2].Int = z; + imm[3].Int = w; + return make_immediate_reg_4(emit, imm); +} + + +/** + * Return a tgsi_full_src_register for an immediate/literal int value + * of the form {value, value, value, value}. + * \sa make_immediate_reg_4() regarding allowed values. + */ +static struct tgsi_full_src_register +make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) +{ + union tgsi_immediate_data imm; + imm.Int = value; + return make_immediate_reg(emit, imm); +} + + +/** + * Allocate space for a union tgsi_immediate_data[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_4(struct svga_shader_emitter_v10 *emit, + const union tgsi_immediate_data imm[4]) +{ + unsigned n = emit->num_immediates++; + assert(!emit->immediates_emitted); + assert(n < Elements(emit->immediates)); + emit->immediates[n][0] = imm[0]; + emit->immediates[n][1] = imm[1]; + emit->immediates[n][2] = imm[2]; + emit->immediates[n][3] = imm[3]; + return n; +} + + +/** + * Allocate space for a float[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, + float x, float y, float z, float w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Float = x; + imm[1].Float = y; + imm[2].Float = z; + imm[3].Float = w; + return alloc_immediate_4(emit, imm); +} + + +/** + * Allocate space for a int[4] immediate. + * \return the index/position of the immediate. + */ +static unsigned +alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, + int x, int y, int z, int w) +{ + union tgsi_immediate_data imm[4]; + imm[0].Int = x; + imm[1].Int = y; + imm[2].Int = z; + imm[3].Int = w; + return alloc_immediate_4(emit, imm); +} + + +/** + * Allocate a shader input to store a system value. + */ +static unsigned +alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) +{ + const unsigned n = emit->info.num_inputs + index; + assert(index < Elements(emit->system_value_indexes)); + emit->system_value_indexes[index] = n; + return n; +} + + +/** + * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. + */ +static boolean +emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_immediate *imm) +{ + /* We don't actually emit any code here. We just save the + * immediate values and emit them later. + */ + alloc_immediate_4(emit, imm->u); + return TRUE; +} + + +/** + * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block + * containing all the immediate values previously allocated + * with alloc_immediate_4(). + */ +static boolean +emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 token; + + assert(!emit->immediates_emitted); + + token.value = 0; + token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; + token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; + + /* Note: no begin/end_emit_instruction() calls */ + emit_dword(emit, token.value); + emit_dword(emit, 2 + 4 * emit->num_immediates); + emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); + + emit->immediates_emitted = TRUE; + + return TRUE; +} + + +/** + * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 + * interpolation mode. + * \return a VGPU10_INTERPOLATION_x value + */ +static unsigned +translate_interpolation(const struct svga_shader_emitter_v10 *emit, + unsigned interp, unsigned interpolate_loc) +{ + if (interp == TGSI_INTERPOLATE_COLOR) { + interp = emit->key.fs.flatshade ? + TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; + } + + switch (interp) { + case TGSI_INTERPOLATE_CONSTANT: + return VGPU10_INTERPOLATION_CONSTANT; + case TGSI_INTERPOLATE_LINEAR: + return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : + VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; + case TGSI_INTERPOLATE_PERSPECTIVE: + return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? + VGPU10_INTERPOLATION_LINEAR_CENTROID : + VGPU10_INTERPOLATION_LINEAR; + default: + assert(!"Unexpected interpolation mode"); + return VGPU10_INTERPOLATION_CONSTANT; + } +} + + +/** + * Translate a TGSI property to VGPU10. + * Don't emit any instructions yet, only need to gather the primitive property information. + * The output primitive topology might be changed later. The final property instructions + * will be emitted as part of the pre-helper code. + */ +static boolean +emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_property *prop) +{ + static const VGPU10_PRIMITIVE primType[] = { + VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ + VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + }; + + static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { + VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + }; + + static const unsigned inputArraySize[] = { + 0, /* VGPU10_PRIMITIVE_UNDEFINED */ + 1, /* VGPU10_PRIMITIVE_POINT */ + 2, /* VGPU10_PRIMITIVE_LINE */ + 3, /* VGPU10_PRIMITIVE_TRIANGLE */ + 0, + 0, + 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ + 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ + }; + + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_GS_INPUT_PRIM: + assert(prop->u[0].Data < Elements(primType)); + emit->gs.prim_type = primType[prop->u[0].Data]; + assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); + emit->gs.input_size = inputArraySize[emit->gs.prim_type]; + break; + + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + assert(prop->u[0].Data < Elements(primTopology)); + emit->gs.prim_topology = primTopology[prop->u[0].Data]; + assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); + break; + + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + emit->gs.max_out_vertices = prop->u[0].Data; + break; + + default: + break; + } + + return TRUE; +} + + +static void +emit_property_instruction(struct svga_shader_emitter_v10 *emit, + VGPU10OpcodeToken0 opcode0, unsigned nData, + unsigned data) +{ + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + if (nData) + emit_dword(emit, data); + end_emit_instruction(emit); +} + + +/** + * Emit property instructions + */ +static void +emit_property_instructions(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + /* emit input primitive type declaration */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; + opcode0.primitive = emit->gs.prim_type; + emit_property_instruction(emit, opcode0, 0, 0); + + /* emit output primitive topology declaration */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; + opcode0.primitiveTopology = emit->gs.prim_topology; + emit_property_instruction(emit, opcode0, 0, 0); + + /* emit max output vertices */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; + emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); +} + + +/** + * Emit a vgpu10 declaration "instruction". + * \param index the register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. + */ +static void +emit_decl_instruction(struct svga_shader_emitter_v10 *emit, + VGPU10OpcodeToken0 opcode0, + VGPU10OperandToken0 operand0, + VGPU10NameToken name_token, + unsigned index, unsigned size) +{ + assert(opcode0.opcodeType); + assert(operand0.mask); + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + + emit_dword(emit, operand0.value); + + if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { + /* Next token is the index of the register to declare */ + emit_dword(emit, index); + } + else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { + /* Next token is the size of the register */ + emit_dword(emit, size); + + /* Followed by the index of the register */ + emit_dword(emit, index); + } + + if (name_token.value) { + emit_dword(emit, name_token.value); + } + + end_emit_instruction(emit); +} + + +/** + * Emit the declaration for a shader input. + * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx + * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x + * \param dim index dimension + * \param index the input register index + * \param size array size of the operand. In most cases, it is 1, + * but for inputs to geometry shader, the array size varies + * depending on the primitive type. + * \param name one of VGPU10_NAME_x + * \parma numComp number of components + * \param selMode component selection mode + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + * \param interpMode interpolation mode + */ +static void +emit_input_declaration(struct svga_shader_emitter_v10 *emit, + unsigned opcodeType, unsigned operandType, + unsigned dim, unsigned index, unsigned size, + unsigned name, unsigned numComp, + unsigned selMode, unsigned usageMask, + unsigned interpMode) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || + opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); + assert(operandType == VGPU10_OPERAND_TYPE_INPUT || + operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); + assert(numComp <= VGPU10_OPERAND_4_COMPONENT); + assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); + assert(dim <= VGPU10_OPERAND_INDEX_3D); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_INSTANCE_ID || + name == VGPU10_NAME_VERTEX_ID || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_IS_FRONT_FACE); + assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || + interpMode == VGPU10_INTERPOLATION_CONSTANT || + interpMode == VGPU10_INTERPOLATION_LINEAR || + interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); + + check_register_index(emit, opcodeType, index); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = opcodeType; + opcode0.interpolationMode = interpMode; + + operand0.operandType = operandType; + operand0.numComponents = numComp; + operand0.selectionMode = selMode; + operand0.mask = usageMask; + operand0.indexDimension = dim; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + if (dim == VGPU10_OPERAND_INDEX_2D) + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); +} + + +/** + * Emit the declaration for a shader output. + * \param type one of VGPU10_OPCODE_DCL_OUTPUTx + * \param index the output register index + * \param name one of VGPU10_NAME_x + * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values + */ +static void +emit_output_declaration(struct svga_shader_emitter_v10 *emit, + unsigned type, unsigned index, + unsigned name, unsigned usageMask) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + assert(type == VGPU10_OPCODE_DCL_OUTPUT || + type == VGPU10_OPCODE_DCL_OUTPUT_SGV || + type == VGPU10_OPCODE_DCL_OUTPUT_SIV); + assert(name == VGPU10_NAME_UNDEFINED || + name == VGPU10_NAME_POSITION || + name == VGPU10_NAME_PRIMITIVE_ID || + name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || + name == VGPU10_NAME_CLIP_DISTANCE); + + check_register_index(emit, type, index); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = type; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = usageMask; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + name_token.name = name; + + emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); +} + + +/** + * Emit the declaration for the fragment depth output. + */ +static void +emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); +} + + +/** + * Emit the declaration for a system value input/output. + */ +static void +emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, + unsigned semantic_name, unsigned index) +{ + switch (semantic_name) { + case TGSI_SEMANTIC_INSTANCEID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_INSTANCE_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED); + break; + case TGSI_SEMANTIC_VERTEXID: + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_VERTEX_ID, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_UNDEFINED); + break; + default: + ; /* XXX */ + } +} + +/** + * Translate a TGSI declaration to VGPU10. + */ +static boolean +emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_declaration *decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + /* do nothing - see emit_input_declarations() */ + return TRUE; + + case TGSI_FILE_OUTPUT: + assert(decl->Range.First == decl->Range.Last); + emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; + return TRUE; + + case TGSI_FILE_TEMPORARY: + /* Don't declare the temps here. Just keep track of how many + * and emit the declaration later. + */ + if (decl->Declaration.Array) { + /* Indexed temporary array. Save the start index of the array + * and the size of the array. + */ + const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); + unsigned i; + + assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); + + /* Save this array so we can emit the declaration for it later */ + emit->temp_arrays[arrayID].start = decl->Range.First; + emit->temp_arrays[arrayID].size = + decl->Range.Last - decl->Range.First + 1; + + emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); + assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); + emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); + + /* Fill in the temp_map entries for this array */ + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + emit->temp_map[i].arrayId = arrayID; + emit->temp_map[i].index = i - decl->Range.First; + } + } + + /* for all temps, indexed or not, keep track of highest index */ + emit->num_shader_temps = MAX2(emit->num_shader_temps, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_CONSTANT: + /* Don't declare constants here. Just keep track and emit later. */ + { + unsigned constbuf = 0, num_consts; + if (decl->Declaration.Dimension) { + constbuf = decl->Dim.Index2D; + } + /* We throw an assertion here when, in fact, the shader should never + * have linked due to constbuf index out of bounds, so we shouldn't + * have reached here. + */ + assert(constbuf < Elements(emit->num_shader_consts)); + + num_consts = MAX2(emit->num_shader_consts[constbuf], + decl->Range.Last + 1); + + if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + debug_printf("Warning: constant buffer is declared to size [%u]" + " but [%u] is the limit.\n", + num_consts, + VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + /* The linker doesn't enforce the max UBO size so we clamp here */ + emit->num_shader_consts[constbuf] = + MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + } + return TRUE; + + case TGSI_FILE_IMMEDIATE: + assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); + return FALSE; + + case TGSI_FILE_SYSTEM_VALUE: + emit_system_value_declaration(emit, decl->Semantic.Name, + decl->Range.First); + return TRUE; + + case TGSI_FILE_SAMPLER: + /* Don't declare samplers here. Just keep track and emit later. */ + emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_RESOURCE: + /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ + /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ + assert(!"TGSI_FILE_RESOURCE not handled yet"); + return FALSE; + + case TGSI_FILE_ADDRESS: + emit->num_address_regs = MAX2(emit->num_address_regs, + decl->Range.Last + 1); + return TRUE; + + case TGSI_FILE_SAMPLER_VIEW: + /* Not used at this time, but maybe in the future. + * See emit_resource_declarations(). + */ + return TRUE; + + default: + assert(!"Unexpected type of declaration"); + return FALSE; + } +} + + + +/** + * Emit all input declarations. + */ +static boolean +emit_input_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + + for (i = 0; i < emit->linkage.num_inputs; i++) { + unsigned semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned type, interpolationMode, name; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* fragment position input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_LINEAR; + name = VGPU10_NAME_POSITION; + if (usage_mask & TGSI_WRITEMASK_W) { + /* we need to replace use of 'w' with '1/w' */ + emit->fs.fragcoord_input_index = i; + } + } + else if (semantic_name == TGSI_SEMANTIC_FACE) { + /* fragment front-facing input */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_IS_FRONT_FACE; + emit->fs.face_input_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* primitive ID */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_PRIMITIVE_ID; + } + else { + /* general fragment input */ + type = VGPU10_OPCODE_DCL_INPUT_PS; + interpolationMode = + translate_interpolation(emit, + emit->info.input_interpolate[i], + emit->info.input_interpolate_loc[i]); + + /* keeps track if flat interpolation mode is being used */ + emit->uses_flat_interp = emit->uses_flat_interp || + (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); + + name = VGPU10_NAME_UNDEFINED; + } + + emit_input_declaration(emit, type, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + name, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + interpolationMode); + } + } + else if (emit->unit == PIPE_SHADER_GEOMETRY) { + + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned semantic_name = emit->info.input_semantic_name[i]; + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = emit->linkage.input_map[i]; + unsigned opcodeType, operandType; + unsigned numComp, selMode; + unsigned name; + unsigned dim; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + opcodeType = VGPU10_OPCODE_DCL_INPUT; + operandType = VGPU10_OPERAND_TYPE_INPUT; + numComp = VGPU10_OPERAND_4_COMPONENT; + selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + name = VGPU10_NAME_UNDEFINED; + + /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ + dim = VGPU10_OPERAND_INDEX_2D; + + if (semantic_name == TGSI_SEMANTIC_PRIMID) { + /* Primitive ID */ + operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; + dim = VGPU10_OPERAND_INDEX_0D; + numComp = VGPU10_OPERAND_0_COMPONENT; + selMode = 0; + + /* also save the register index so we can check for + * primitive id when emit src register. We need to modify the + * operand type, index dimension when emit primitive id src reg. + */ + emit->gs.prim_id_index = i; + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* vertex position input */ + opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; + name = VGPU10_NAME_POSITION; + } + + emit_input_declaration(emit, opcodeType, operandType, + dim, index, + emit->gs.input_size, + name, + numComp, selMode, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED); + } + } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + + for (i = 0; i < emit->info.num_inputs; i++) { + unsigned usage_mask = emit->info.input_usage_mask[i]; + unsigned index = i; + + if (usage_mask == 0) + continue; /* register is not actually used */ + + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED); + } + } + + return TRUE; +} + + +/** + * Emit all output declarations. + */ +static boolean +emit_output_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->info.num_outputs; i++) { + /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ + const unsigned semantic_name = emit->info.output_semantic_name[i]; + const unsigned semantic_index = emit->info.output_semantic_index[i]; + unsigned index = i; + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (semantic_name == TGSI_SEMANTIC_COLOR) { + assert(semantic_index < Elements(emit->fs.color_out_index)); + + emit->fs.color_out_index[semantic_index] = index; + + /* The semantic index is the shader's color output/buffer index */ + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, semantic_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + + if (semantic_index == 0) { + if (emit->key.fs.write_color0_to_n_cbufs > 1) { + /* Emit declarations for the additional color outputs + * for broadcasting. + */ + unsigned j; + for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { + /* Allocate a new output index */ + unsigned idx = emit->info.num_outputs + j - 1; + emit->fs.color_out_index[j] = idx; + emit_output_declaration(emit, + VGPU10_OPCODE_DCL_OUTPUT, idx, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + emit->info.output_semantic_index[idx] = j; + } + } + } + else { + assert(!emit->key.fs.write_color0_to_n_cbufs); + } + } + else if (semantic_name == TGSI_SEMANTIC_POSITION) { + /* Fragment depth output */ + emit_fragdepth_output_declaration(emit); + } + else { + assert(!"Bad output semantic name"); + } + } + else { + /* VS or GS */ + unsigned name, type; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + + switch (semantic_name) { + case TGSI_SEMANTIC_POSITION: + assert(emit->unit != PIPE_SHADER_FRAGMENT); + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_POSITION; + /* Save the index of the vertex position output register */ + emit->vposition.out_index = index; + break; + case TGSI_SEMANTIC_CLIPDIST: + type = VGPU10_OPCODE_DCL_OUTPUT_SIV; + name = VGPU10_NAME_CLIP_DISTANCE; + /* save the starting index of the clip distance output register */ + if (semantic_index == 0) + emit->clip_dist_out_index = index; + writemask = emit->output_usage_mask[index]; + writemask = apply_clip_plane_mask(emit, writemask, semantic_index); + if (writemask == 0x0) { + continue; /* discard this do-nothing declaration */ + } + break; + case TGSI_SEMANTIC_PRIMID: + assert(emit->unit == PIPE_SHADER_GEOMETRY); + type = VGPU10_OPCODE_DCL_OUTPUT_SGV; + name = VGPU10_NAME_PRIMITIVE_ID; + break; + case TGSI_SEMANTIC_LAYER: + assert(emit->unit == PIPE_SHADER_GEOMETRY); + type = VGPU10_OPCODE_DCL_OUTPUT_SGV; + name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + emit->clip_vertex_out_index = index; + break; + default: + /* generic output */ + type = VGPU10_OPCODE_DCL_OUTPUT; + name = VGPU10_NAME_UNDEFINED; + } + + emit_output_declaration(emit, type, index, name, writemask); + } + } + + if (emit->vposition.so_index != INVALID_INDEX && + emit->vposition.out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the non-adjusted vertex position + * for stream output purpose + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->vposition.so_index, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL); + } + + if (emit->clip_dist_so_index != INVALID_INDEX && + emit->clip_dist_out_index != INVALID_INDEX) { + + assert(emit->unit != PIPE_SHADER_FRAGMENT); + + /* Emit the declaration for the clip distance shadow copy which + * will be used for stream output purpose and for clip distance + * varying variable + */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index]); + + if (emit->info.num_written_clipdistance > 4) { + /* for the second clip distance register, each handles 4 planes */ + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, + emit->clip_dist_so_index + 1, + VGPU10_NAME_UNDEFINED, + emit->output_usage_mask[emit->clip_dist_out_index+1]); + } + } + + return TRUE; +} + + +/** + * Emit the declaration for the temporary registers. + */ +static boolean +emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) +{ + unsigned total_temps, reg, i; + + total_temps = emit->num_shader_temps; + + /* Allocate extra temps for specially-implemented instructions, + * such as LIT. + */ + total_temps += MAX_INTERNAL_TEMPS; + + if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { + if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || + emit->key.clip_plane_enable || + emit->vposition.so_index != INVALID_INDEX) { + emit->vposition.tmp_index = total_temps; + total_temps += 1; + } + + if (emit->unit == PIPE_SHADER_VERTEX) { + unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | + emit->key.vs.adjust_attrib_itof | + emit->key.vs.adjust_attrib_utof | + emit->key.vs.attrib_is_bgra | + emit->key.vs.attrib_puint_to_snorm | + emit->key.vs.attrib_puint_to_uscaled | + emit->key.vs.attrib_puint_to_sscaled); + while (attrib_mask) { + unsigned index = u_bit_scan(&attrib_mask); + emit->vs.adjusted_input[index] = total_temps++; + } + } + + if (emit->clip_mode == CLIP_DISTANCE) { + /* We need to write the clip distance to a temporary register + * first. Then it will be copied to the shadow copy for + * the clip distance varying variable and stream output purpose. + * It will also be copied to the actual CLIPDIST register + * according to the enabled clip planes + */ + emit->clip_dist_tmp_index = total_temps++; + if (emit->info.num_written_clipdistance > 4) + total_temps++; /* second clip register */ + } + else if (emit->clip_mode == CLIP_VERTEX) { + /* We need to convert the TGSI CLIPVERTEX output to one or more + * clip distances. Allocate a temp reg for the clipvertex here. + */ + assert(emit->info.writes_clipvertex > 0); + emit->clip_vertex_tmp_index = total_temps; + total_temps++; + } + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || + emit->key.fs.write_color0_to_n_cbufs > 1) { + /* Allocate a temp to hold the output color */ + emit->fs.color_tmp_index = total_temps; + total_temps += 1; + } + + if (emit->fs.face_input_index != INVALID_INDEX) { + /* Allocate a temp for the +/-1 face register */ + emit->fs.face_tmp_index = total_temps; + total_temps += 1; + } + + if (emit->fs.fragcoord_input_index != INVALID_INDEX) { + /* Allocate a temp for modified fragment position register */ + emit->fs.fragcoord_tmp_index = total_temps; + total_temps += 1; + } + } + + for (i = 0; i < emit->num_address_regs; i++) { + emit->address_reg_index[i] = total_temps++; + } + + /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 + * temp indexes. Basically, we compact all the non-array temp register + * indexes into a consecutive series. + * + * Before, we may have some TGSI declarations like: + * DCL TEMP[0..1], LOCAL + * DCL TEMP[2..4], ARRAY(1), LOCAL + * DCL TEMP[5..7], ARRAY(2), LOCAL + * plus, some extra temps, like TEMP[8], TEMP[9] for misc things + * + * After, we'll have a map like this: + * temp_map[0] = { array 0, index 0 } + * temp_map[1] = { array 0, index 1 } + * temp_map[2] = { array 1, index 0 } + * temp_map[3] = { array 1, index 1 } + * temp_map[4] = { array 1, index 2 } + * temp_map[5] = { array 2, index 0 } + * temp_map[6] = { array 2, index 1 } + * temp_map[7] = { array 2, index 2 } + * temp_map[8] = { array 0, index 2 } + * temp_map[9] = { array 0, index 3 } + * + * We'll declare two arrays of 3 elements, plus a set of four non-indexed + * temps numbered 0..3 + * + * Any time we emit a temporary register index, we'll have to use the + * temp_map[] table to convert the TGSI index to the VGPU10 index. + * + * Finally, we recompute the total_temps value here. + */ + reg = 0; + for (i = 0; i < total_temps; i++) { + if (emit->temp_map[i].arrayId == 0) { + emit->temp_map[i].index = reg++; + } + } + total_temps = reg; + + if (0) { + debug_printf("total_temps %u\n", total_temps); + for (i = 0; i < 30; i++) { + debug_printf("temp %u -> array %u index %u\n", + i, emit->temp_map[i].arrayId, emit->temp_map[i].index); + } + } + + /* Emit declaration of ordinary temp registers */ + if (total_temps > 0) { + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, total_temps); + end_emit_instruction(emit); + } + + /* Emit declarations for indexable temp arrays. Skip 0th entry since + * it's unused. + */ + for (i = 1; i < emit->num_temp_arrays; i++) { + unsigned num_temps = emit->temp_arrays[i].size; + + if (num_temps > 0) { + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, i); /* which array */ + emit_dword(emit, num_temps); + emit_dword(emit, 4); /* num components */ + end_emit_instruction(emit); + + total_temps += num_temps; + } + } + + /* Check that the grand total of all regular and indexed temps is + * under the limit. + */ + check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); + + return TRUE; +} + + +static boolean +emit_constant_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + unsigned total_consts, i; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; + opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; + /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = 0; + operand0.swizzleY = 1; + operand0.swizzleZ = 2; + operand0.swizzleW = 3; + + /** + * Emit declaration for constant buffer [0]. We also allocate + * room for the extra constants here. + */ + total_consts = emit->num_shader_consts[0]; + + /* Now, allocate constant slots for the "extra" constants */ + + /* Vertex position scale/translation */ + if (emit->vposition.need_prescale) { + emit->vposition.prescale_scale_index = total_consts++; + emit->vposition.prescale_trans_index = total_consts++; + } + + if (emit->unit == PIPE_SHADER_VERTEX) { + if (emit->key.vs.undo_viewport) { + emit->vs.viewport_index = total_consts++; + } + } + + /* user-defined clip planes */ + if (emit->key.clip_plane_enable) { + unsigned n = util_bitcount(emit->key.clip_plane_enable); + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + for (i = 0; i < n; i++) { + emit->clip_plane_const[i] = total_consts++; + } + } + + /* Texcoord scale factors for RECT textures */ + { + for (i = 0; i < emit->num_samplers; i++) { + if (emit->key.tex[i].unnormalized) { + emit->texcoord_scale_index[i] = total_consts++; + } + } + } + + /* Texture buffer sizes */ + for (i = 0; i < emit->num_samplers; i++) { + if (emit->key.tex[i].texture_target == PIPE_BUFFER) { + emit->texture_buffer_size_index[i] = total_consts++; + } + } + + if (total_consts > 0) { + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, 0); /* which const buffer slot */ + emit_dword(emit, total_consts); + end_emit_instruction(emit); + } + + /* Declare remaining constant buffers (UBOs) */ + for (i = 1; i < Elements(emit->num_shader_consts); i++) { + if (emit->num_shader_consts[i] > 0) { + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); /* which const buffer slot */ + emit_dword(emit, emit->num_shader_consts[i]); + end_emit_instruction(emit); + } + } + + return TRUE; +} + + +/** + * Emit declarations for samplers. + */ +static boolean +emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + for (i = 0; i < emit->num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; + opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); + end_emit_instruction(emit); + } + + return TRUE; +} + + +/** + * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. + */ +static unsigned +pipe_texture_to_resource_dimension(unsigned target, bool msaa) +{ + switch (target) { + case PIPE_BUFFER: + return VGPU10_RESOURCE_DIMENSION_BUFFER; + case PIPE_TEXTURE_1D: + return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS + : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + case PIPE_TEXTURE_3D: + return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; + case PIPE_TEXTURE_CUBE: + return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + case PIPE_TEXTURE_1D_ARRAY: + return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY; + case PIPE_TEXTURE_2D_ARRAY: + return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; + case PIPE_TEXTURE_CUBE_ARRAY: + return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; + default: + assert(!"Unexpected resource type"); + return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + } +} + + +/** + * Given a tgsi_return_type, return true iff it is an integer type. + */ +static boolean +is_integer_type(enum tgsi_return_type type) +{ + switch (type) { + case TGSI_RETURN_TYPE_SINT: + case TGSI_RETURN_TYPE_UINT: + return TRUE; + case TGSI_RETURN_TYPE_FLOAT: + case TGSI_RETURN_TYPE_UNORM: + case TGSI_RETURN_TYPE_SNORM: + return FALSE; + case TGSI_RETURN_TYPE_COUNT: + default: + assert(!"is_integer_type: Unknown tgsi_return_type"); + return FALSE; + } +} + + +/** + * Emit declarations for resources. + * XXX When we're sure that all TGSI shaders will be generated with + * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may + * rework this code. + */ +static boolean +emit_resource_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + + /* Emit resource decl for each sampler */ + for (i = 0; i < emit->num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10ResourceReturnTypeToken return_type; + VGPU10_RESOURCE_RETURN_TYPE rt; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; + opcode0.resourceDimension = + pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target, + emit->key.tex[i].texture_msaa); + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + +#if 1 + /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ + STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); + STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); + assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT); + rt = emit->key.tex[i].return_type + 1; +#else + switch (emit->key.tex[i].return_type) { + case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; + case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; + case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; + case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; + case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; + case TGSI_RETURN_TYPE_COUNT: + default: + rt = VGPU10_RETURN_TYPE_FLOAT; + assert(!"emit_resource_declarations: Unknown tgsi_return_type"); + } +#endif + + return_type.value = 0; + return_type.component0 = rt; + return_type.component1 = rt; + return_type.component2 = rt; + return_type.component3 = rt; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); + emit_dword(emit, return_type.value); + end_emit_instruction(emit); + } + + return TRUE; +} + +static void +emit_instruction_op1(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src); + end_emit_instruction(emit); +} + +static void +emit_instruction_op2(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src1, + const struct tgsi_full_src_register *src2, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src1); + emit_src_register(emit, src2); + end_emit_instruction(emit); +} + +static void +emit_instruction_op3(struct svga_shader_emitter_v10 *emit, + unsigned opcode, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src1, + const struct tgsi_full_src_register *src2, + const struct tgsi_full_src_register *src3, + boolean saturate) +{ + begin_emit_instruction(emit); + emit_opcode(emit, opcode, saturate); + emit_dst_register(emit, dst); + emit_src_register(emit, src1); + emit_src_register(emit, src2); + emit_src_register(emit, src3); + end_emit_instruction(emit); +} + +/** + * Emit the actual clip distance instructions to be used for clipping + * by copying the clip distance from the temporary registers to the + * CLIPDIST registers written with the enabled planes mask. + * Also copy the clip distance from the temporary to the clip distance + * shadow copy register which will be referenced by the input shader + */ +static void +emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) +{ + struct tgsi_full_src_register tmp_clip_dist_src; + struct tgsi_full_dst_register clip_dist_dst; + + unsigned i; + unsigned clip_plane_enable = emit->key.clip_plane_enable; + unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; + unsigned num_written_clipdist = emit->info.num_written_clipdistance; + + assert(emit->clip_dist_out_index != INVALID_INDEX); + assert(emit->clip_dist_tmp_index != INVALID_INDEX); + + /** + * Temporary reset the temporary clip dist register index so + * that the copy to the real clip dist register will not + * attempt to copy to the temporary register again + */ + emit->clip_dist_tmp_index = INVALID_INDEX; + + for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) { + + tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); + + /** + * copy to the shadow copy for use by varying variable and + * stream output. All clip distances + * will be written regardless of the enabled clipping planes. + */ + clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, + emit->clip_dist_so_index + i); + + /* MOV clip_dist_so, tmp_clip_dist */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, + &tmp_clip_dist_src, FALSE); + + /** + * copy those clip distances to enabled clipping planes + * to CLIPDIST registers for clipping + */ + if (clip_plane_enable & 0xf) { + clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, + emit->clip_dist_out_index + i); + clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); + + /* MOV CLIPDIST, tmp_clip_dist */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, + &tmp_clip_dist_src, FALSE); + } + /* four clip planes per clip register */ + clip_plane_enable >>= 4; + } + /** + * set the temporary clip dist register index back to the + * temporary index for the next vertex + */ + emit->clip_dist_tmp_index = clip_dist_tmp_index; +} + +/* Declare clip distance output registers for user-defined clip planes + * or the TGSI_CLIPVERTEX output. + */ +static void +emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); + unsigned index = emit->num_outputs; + unsigned plane_mask; + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + assert(num_clip_planes <= 8); + + if (emit->clip_mode != CLIP_LEGACY && + emit->clip_mode != CLIP_VERTEX) { + return; + } + + if (num_clip_planes == 0) + return; + + /* Declare one or two clip output registers. The number of components + * in the mask reflects the number of clip planes. For example, if 5 + * clip planes are needed, we'll declare outputs similar to: + * dcl_output_siv o2.xyzw, clip_distance + * dcl_output_siv o3.x, clip_distance + */ + emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ + + plane_mask = (1 << num_clip_planes) - 1; + if (plane_mask & 0xf) { + unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, + VGPU10_NAME_CLIP_DISTANCE, cmask); + emit->num_outputs++; + } + if (plane_mask & 0xf0) { + unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, + VGPU10_NAME_CLIP_DISTANCE, cmask); + emit->num_outputs++; + } +} + + +/** + * Emit the instructions for writing to the clip distance registers + * to handle legacy/automatic clip planes. + * For each clip plane, the distance is the dot product of the vertex + * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. + * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE + * output registers already declared. + */ +static void +emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, + unsigned vpos_tmp_index) +{ + unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); + + assert(emit->clip_mode == CLIP_LEGACY); + assert(num_clip_planes <= 8); + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + + for (i = 0; i < num_clip_planes; i++) { + struct tgsi_full_dst_register dst; + struct tgsi_full_src_register plane_src, vpos_src; + unsigned reg_index = emit->clip_dist_out_index + i / 4; + unsigned comp = i % 4; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; + + /* create dst, src regs */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); + dst = writemask_dst(&dst, writemask); + + plane_src = make_src_const_reg(emit->clip_plane_const[i]); + vpos_src = make_src_temp_reg(vpos_tmp_index); + + /* DP4 clip_dist, plane, vpos */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, + &plane_src, &vpos_src, FALSE); + } +} + + +/** + * Emit the instructions for computing the clip distance results from + * the clip vertex temporary. + * For each clip plane, the distance is the dot product of the clip vertex + * position (found in a temp reg) and the clip plane coefficients. + */ +static void +emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); + unsigned i; + struct tgsi_full_dst_register dst; + struct tgsi_full_src_register clipvert_src; + const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; + + assert(emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY); + + assert(emit->clip_mode == CLIP_VERTEX); + + clipvert_src = make_src_temp_reg(clip_vertex_tmp); + + for (i = 0; i < num_clip; i++) { + struct tgsi_full_src_register plane_src; + unsigned reg_index = emit->clip_dist_out_index + i / 4; + unsigned comp = i % 4; + unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; + + /* create dst, src regs */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); + dst = writemask_dst(&dst, writemask); + + plane_src = make_src_const_reg(emit->clip_plane_const[i]); + + /* DP4 clip_dist, plane, vpos */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, + &plane_src, &clipvert_src, FALSE); + } + + /* copy temporary clip vertex register to the clip vertex register */ + + assert(emit->clip_vertex_out_index != INVALID_INDEX); + + /** + * temporary reset the temporary clip vertex register index so + * that copy to the clip vertex register will not attempt + * to copy to the temporary register again + */ + emit->clip_vertex_tmp_index = INVALID_INDEX; + + /* MOV clip_vertex, clip_vertex_tmp */ + dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst, &clipvert_src, FALSE); + + /** + * set the temporary clip vertex register index back to the + * temporary index for the next vertex + */ + emit->clip_vertex_tmp_index = clip_vertex_tmp; +} + +/** + * Emit code to convert RGBA to BGRA + */ +static void +emit_swap_r_b(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register bgra_src = + swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, dst); + emit_src_register(emit, &bgra_src); + end_emit_instruction(emit); +} + + +/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ +static void +emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); + struct tgsi_full_src_register two = + make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); + struct tgsi_full_src_register neg_two = + make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); + + unsigned val_tmp = get_temp_index(emit); + struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); + struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); + + unsigned bias_tmp = get_temp_index(emit); + struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); + struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); + + /* val = src * 2.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, + src, &two, FALSE); + + /* bias = src > 0.5 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, + src, &half, FALSE); + + /* bias = bias & -2.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, + &bias_src, &neg_two, FALSE); + + /* dst = val + bias */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, + &val_src, &bias_src, FALSE); + + free_temp_indexes(emit); +} + + +/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ +static void +emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register scale = + make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); + + /* dst = src * scale */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); +} + + +/** Convert from R32_UINT to 10_10_10_2_sscaled */ +static void +emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src) +{ + struct tgsi_full_src_register lshift = + make_immediate_reg_int4(emit, 22, 12, 2, 0); + struct tgsi_full_src_register rshift = + make_immediate_reg_int4(emit, 22, 22, 22, 30); + + struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + + /* + * r = (pixel << 22) >> 22; # signed int in [511, -512] + * g = (pixel << 12) >> 22; # signed int in [511, -512] + * b = (pixel << 2) >> 22; # signed int in [511, -512] + * a = (pixel << 0) >> 30; # signed int in [1, -2] + * dst = i_to_f(r,g,b,a); # convert to float + */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, + &src_xxxx, &lshift, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, + &tmp_src, &rshift, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); + + free_temp_indexes(emit); +} + + +/** + * Emit code for TGSI_OPCODE_ABS instruction. + */ +static boolean +emit_abs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = ABS(s0): + * dst = abs(s0) + * Translates into: + * MOV dst, abs(s0) + */ + struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]); + + /* MOV dst, abs(s0) */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &abs_src0, inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. + */ +static boolean +emit_arl_uarl(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned index = inst->Dst[0].Register.Index; + struct tgsi_full_dst_register dst; + unsigned opcode; + + assert(index < MAX_VGPU10_ADDR_REGS); + dst = make_dst_temp_reg(emit->address_reg_index[index]); + + /* ARL dst, s0 + * Translates into: + * FTOI address_tmp, s0 + * + * UARL dst, s0 + * Translates into: + * MOV address_tmp, s0 + */ + if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) + opcode = VGPU10_OPCODE_FTOI; + else + opcode = VGPU10_OPCODE_MOV; + + emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_CAL instruction. + */ +static boolean +emit_cal(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned label = inst->Label.Label; + VGPU10OperandToken0 operand; + operand.value = 0; + operand.operandType = VGPU10_OPERAND_TYPE_LABEL; + + begin_emit_instruction(emit); + emit_dword(emit, operand.value); + emit_dword(emit, label); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_IABS instruction. + */ +static boolean +emit_iabs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src0.x < 0) ? -src0.x : src0.x + * dst.y = (src0.y < 0) ? -src0.y : src0.y + * dst.z = (src0.z < 0) ? -src0.z : src0.z + * dst.w = (src0.w < 0) ? -src0.w : src0.w + * + * Translates into + * IMAX dst, src, neg(src) + */ + struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); + emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], + &inst->Src[0], &neg_src, FALSE); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_CMP instruction. + */ +static boolean +emit_cmp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src0.x < 0) ? src1.x : src2.x + * dst.y = (src0.y < 0) ? src1.y : src2.y + * dst.z = (src0.z < 0) ? src1.z : src2.z + * dst.w = (src0.w < 0) ? src1.w : src2.w + * + * Translates into + * LT tmp, src0, 0.0 + * MOVC dst, tmp, src1, src2 + */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, + &inst->Src[0], &zero, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], + &tmp_src, &inst->Src[1], &inst->Src[2], + inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DP2A instruction. + */ +static boolean +emit_dp2a(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x + * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x + * Translate into + * MAD tmp.x, s0.y, s1.y, s2.x + * MAD tmp.x, s0.x, s1.x, tmp.x + * MOV dst.xyzw, tmp.xxxx + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + + struct tgsi_full_src_register src0_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src1_xxxx = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src2_xxxx = + scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); + + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, + &src1_yyyy, &src2_xxxx, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, + &src1_xxxx, &tmp_src_xxxx, FALSE); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DPH instruction. + */ +static boolean +emit_dph(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * DP3 tmp, s0, s1 + * ADD dst, tmp, s1.wwww + */ + + struct tgsi_full_src_register s1_wwww = + swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + /* DP3 tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* ADD dst, tmp, s1.wwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, + &s1_wwww, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_DST instruction. + */ +static boolean +emit_dst(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = 1 + * dst.y = src0.y * src1.y + * dst.z = src0.z + * dst.w = src1.w + */ + + struct tgsi_full_src_register s0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s0_zzzz = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); + struct tgsi_full_src_register s1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s1_wwww = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); + + /* + * If dst and either src0 and src1 are the same we need + * to create a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* MOV dst.x, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + } + + /* MUL dst.y, s0.y, s1.y */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, + &s1_yyyy, inst->Instruction.Saturate); + } + + /* MOV dst.z, s0.z */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, + inst->Instruction.Saturate); + } + + /* MOV dst.w, s1.w */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, + inst->Instruction.Saturate); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + free_temp_indexes(emit); + + return TRUE; +} + + + +/** + * Emit code for TGSI_OPCODE_ENDPRIM (GS only) + */ +static boolean +emit_endprim(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + /* We can't use emit_simple() because the TGSI instruction has one + * operand (vertex stream number) which we must ignore for VGPU10. + */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); + end_emit_instruction(emit); + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. + */ +static boolean +emit_ex2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_EX2 computes only one value from src.x + * while VGPU10 computes four values. + * + * dst = EX2(src): + * dst.xyzw = 2.0 ^ src.x + */ + + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* EXP tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_EXP instruction. + */ +static boolean +emit_exp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = 2 ^ floor(s0.x) + * dst.y = s0.x - floor(s0.x) + * dst.z = 2 ^ s0.x + * dst.w = 1.0 + */ + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + /* + * If dst and src are the same we need to create + * a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* only use X component of temp reg */ + tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* ROUND_NI tmp.x, s0.x */ + emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, + &src_xxxx, FALSE); /* round to -infinity */ + + /* EXP dst.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, + inst->Instruction.Saturate); + } + + /* ADD dst.y, s0.x, -tmp */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); + + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, + &neg_tmp_src, inst->Instruction.Saturate); + } + + /* EXP dst.z, s0.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, + inst->Instruction.Saturate); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, + FALSE); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_IF instruction. + */ +static boolean +emit_if(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + VGPU10OpcodeToken0 opcode0; + + /* The src register should be a scalar */ + assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && + inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && + inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); + + /* The only special thing here is that we need to set the + * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if + * src.x is non-zero. + */ + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_IF; + opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of + * the register components are negative). + */ +static boolean +emit_kill_if(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* tmp = src[0] < 0.0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], + &zero, FALSE); + + if (!same_swizzle_terms(&inst->Src[0])) { + /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to + * logically OR the swizzle terms. Most uses of KILL_IF only + * test one channel so it's good to avoid these extra steps. + */ + struct tgsi_full_src_register tmp_src_yyyy = + scalar_src(&tmp_src, TGSI_SWIZZLE_Y); + struct tgsi_full_src_register tmp_src_zzzz = + scalar_src(&tmp_src, TGSI_SWIZZLE_Z); + struct tgsi_full_src_register tmp_src_wwww = + scalar_src(&tmp_src, TGSI_SWIZZLE_W); + + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, + &tmp_src_wwww, FALSE); + } + + begin_emit_instruction(emit); + emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ + emit_src_register(emit, &tmp_src_xxxx); + end_emit_instruction(emit); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). + */ +static boolean +emit_kill(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + /* DISCARD if 0.0 is zero */ + begin_emit_instruction(emit); + emit_discard_opcode(emit, FALSE); + emit_src_register(emit, &zero); + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LG2 instruction. + */ +static boolean +emit_lg2(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_LG2 computes only one value from src.x + * while VGPU10 computes four values. + * + * dst = LG2(src): + * dst.xyzw = log2(src.x) + */ + + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* LOG tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LIT instruction. + */ +static boolean +emit_lit(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* + * If dst and src are the same we need to create + * a temporary for it and insert a extra move. + */ + unsigned tmp_move = get_temp_index(emit); + struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); + struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); + + /* + * dst.x = 1 + * dst.y = max(src.x, 0) + * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + * dst.w = 1 + */ + + /* MOV dst.x, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&move_dst, TGSI_WRITEMASK_X); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&move_dst, TGSI_WRITEMASK_W); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); + } + + /* MAX dst.y, src.x, 0.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&move_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register src_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, + &zero, inst->Instruction.Saturate); + } + + /* + * tmp1 = clamp(src.w, -128, 128); + * MAX tmp1, src.w, -128 + * MIN tmp1, tmp1, 128 + * + * tmp2 = max(tmp2, 0); + * MAX tmp2, src.y, 0 + * + * tmp1 = pow(tmp2, tmp1); + * LOG tmp2, tmp2 + * MUL tmp1, tmp2, tmp1 + * EXP tmp1, tmp1 + * + * tmp1 = (src.w == 0) ? 1 : tmp1; + * EQ tmp2, 0, src.w + * MOVC tmp1, tmp2, 1.0, tmp1 + * + * dst.z = (0 < src.x) ? tmp1 : 0; + * LT tmp2, 0, src.x + * MOVC dst.z, tmp2, tmp1, 0.0 + */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&move_dst, TGSI_WRITEMASK_Z); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register src_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register src_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register lowerbound = + make_immediate_reg_float(emit, -128.0f); + struct tgsi_full_src_register upperbound = + make_immediate_reg_float(emit, 128.0f); + + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, + &lowerbound, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, + &upperbound, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, + &zero, FALSE); + + /* POW tmp1, tmp2, tmp1 */ + /* LOG tmp2, tmp2 */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, + FALSE); + + /* MUL tmp1, tmp2, tmp1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, + &tmp1_src, FALSE); + + /* EXP tmp1, tmp1 */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, + FALSE); + + /* EQ tmp2, 0, src.w */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, + &src_wwww, FALSE); + /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, + &tmp2_src, &one, &tmp1_src, FALSE); + + /* LT tmp2, 0, src.x */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, + &src_xxxx, FALSE); + /* MOVC dst.z, tmp2, tmp1, 0.0 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, + &tmp2_src, &tmp1_src, &zero, FALSE); + } + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, + FALSE); + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LOG instruction. + */ +static boolean +emit_log(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* + * dst.x = floor(lg2(abs(s0.x))) + * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) + * dst.z = lg2(abs(s0.x)) + * dst.w = 1.0 + */ + + struct tgsi_full_src_register src_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); + + /* only use X component of temp reg */ + tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* LOG tmp.x, abs(s0.x) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, + &abs_src_xxxx, FALSE); + } + + /* MOV dst.z, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + struct tgsi_full_dst_register dst_z = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, + &tmp_src, inst->Instruction.Saturate); + } + + /* FLR tmp.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { + emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, + &tmp_src, FALSE); + } + + /* MOV dst.x, tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + struct tgsi_full_dst_register dst_x = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, + inst->Instruction.Saturate); + } + + /* EXP tmp.x, tmp.x */ + /* DIV dst.y, abs(s0.x), tmp.x */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + struct tgsi_full_dst_register dst_y = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, + FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, + &tmp_src, inst->Instruction.Saturate); + } + + /* MOV dst.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_dst_register dst_w = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_LRP instruction. + */ +static boolean +emit_lrp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = LRP(s0, s1, s2): + * dst = s0 * (s1 - s2) + s2 + * Translates into: + * SUB tmp, s1, s2; tmp = s1 - s2 + * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); + struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); + struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); + + /* ADD tmp, s1, -s2 */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, + &inst->Src[1], &neg_src2, FALSE); + + /* MAD dst, s1, tmp, s3 */ + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], + &inst->Src[0], &src_tmp, &inst->Src[2], + inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_POW instruction. + */ +static boolean +emit_pow(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* Note that TGSI_OPCODE_POW computes only one value from src0.x and + * src1.x while VGPU10 computes four values. + * + * dst = POW(src0, src1): + * dst.xyzw = src0.x ^ src1.x + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_xxxx = + swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + struct tgsi_full_src_register src1_xxxx = + swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + /* LOG tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, + FALSE); + + /* MUL tmp, tmp, s1.xxxx */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, + &src1_xxxx, FALSE); + + /* EXP tmp, s0.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], + &tmp_src, inst->Instruction.Saturate); + + /* free tmp */ + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. + */ +static boolean +emit_rcp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* DIV tmp.x, 1.0, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, + &inst->Src[0], FALSE); + + /* MOV dst, tmp.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_RSQ instruction. + */ +static boolean +emit_rsq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = RSQ(src): + * dst.xyzw = 1 / sqrt(src.x) + * Translates into: + * RSQ tmp, src.x + * MOV dst, tmp.xxxx + */ + + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + + /* RSQ tmp, src.x */ + emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, + &inst->Src[0], FALSE); + + /* MOV dst, tmp.xxxx */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + /* free tmp */ + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SCS instruction. + */ +static boolean +emit_scs(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = cos(src.x) + * dst.y = sin(src.x) + * dst.z = 0.0 + * dst.w = 1.0 + */ + struct tgsi_full_dst_register dst_x = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); + struct tgsi_full_dst_register dst_y = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); + struct tgsi_full_dst_register dst_zw = + writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); + + struct tgsi_full_src_register zero_one = + make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); + emit_dst_register(emit, &dst_y); + emit_dst_register(emit, &dst_x); + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst_zw, &zero_one, inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. + */ +static boolean +emit_seq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SEQ(s0, s1): + * dst = s0 == s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* EQ tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. + */ +static boolean +emit_sge(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SGE(s0, s1): + * dst = s0 >= s1 ? 1.0 : 0.0 (per component) + * Translates into: + * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* GE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. + */ +static boolean +emit_sgt(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SGT(s0, s1): + * dst = s0 > s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* LT tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], + &inst->Src[0], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. + */ +static boolean +emit_sincos(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + + struct tgsi_full_src_register tmp_src_xxxx = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst_x = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); + + if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) + { + emit_dst_register(emit, &tmp_dst_x); /* first destination register */ + emit_null_dst_register(emit); /* second destination register */ + } + else { + emit_null_dst_register(emit); + emit_dst_register(emit, &tmp_dst_x); + } + + emit_src_register(emit, &inst->Src[0]); + end_emit_instruction(emit); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], + &tmp_src_xxxx, inst->Instruction.Saturate); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. + */ +static boolean +emit_sle(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SLE(s0, s1): + * dst = s0 <= s1 ? 1.0 : 0.0 (per component) + * Translates into: + * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* GE tmp, s1, s0 */ + emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], + &inst->Src[0], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. + */ +static boolean +emit_slt(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SLT(s0, s1): + * dst = s0 < s1 ? 1.0 : 0.0 (per component) + * Translates into: + * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* LT tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. + */ +static boolean +emit_sne(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SNE(s0, s1): + * dst = s0 != s1 ? 1.0 : 0.0 (per component) + * Translates into: + * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) + * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) + */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + + /* NE tmp, s0, s1 */ + emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], + &inst->Src[1], FALSE); + + /* MOVC dst, tmp, one, zero */ + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, + &one, &zero, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. + */ +static boolean +emit_ssg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 + * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 + * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 + * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 + * Translates into: + * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) + * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) + * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) + * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) + */ + struct tgsi_full_src_register zero = + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register neg_one = + make_immediate_reg_float(emit, -1.0f); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], + &zero, FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, + &neg_one, &zero, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, + &inst->Src[0], FALSE); + emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, + &one, &tmp2_src, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. + */ +static boolean +emit_issg(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + * Translates into: + * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) + * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) + * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) + */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + + struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); + + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, + &inst->Src[0], &zero, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, + &zero, &inst->Src[0], FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], + &tmp1_src, &neg_tmp2, FALSE); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_SUB instruction. + */ +static boolean +emit_sub(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst = SUB(s0, s1): + * dst = s0 - s1 + * Translates into: + * ADD dst, s0, neg(s1) + */ + struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); + + /* ADD dst, s0, neg(s1) */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], + &inst->Src[0], &neg_src1, + inst->Instruction.Saturate); + + return TRUE; +} + + +/** + * Emit a comparison instruction. The dest register will get + * 0 or ~0 values depending on the outcome of comparing src0 to src1. + */ +static void +emit_comparison(struct svga_shader_emitter_v10 *emit, + SVGA3dCmpFunc func, + const struct tgsi_full_dst_register *dst, + const struct tgsi_full_src_register *src0, + const struct tgsi_full_src_register *src1) +{ + struct tgsi_full_src_register immediate; + VGPU10OpcodeToken0 opcode0; + boolean swapSrc = FALSE; + + /* Sanity checks for svga vs. gallium enums */ + STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); + STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); + + opcode0.value = 0; + + switch (func) { + case SVGA3D_CMP_NEVER: + immediate = make_immediate_reg_int(emit, 0); + /* MOV dst, {0} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_ALWAYS: + immediate = make_immediate_reg_int(emit, -1); + /* MOV dst, {-1} */ + begin_emit_instruction(emit); + emit_dword(emit, VGPU10_OPCODE_MOV); + emit_dst_register(emit, dst); + emit_src_register(emit, &immediate); + end_emit_instruction(emit); + return; + case SVGA3D_CMP_LESS: + opcode0.opcodeType = VGPU10_OPCODE_LT; + break; + case SVGA3D_CMP_EQUAL: + opcode0.opcodeType = VGPU10_OPCODE_EQ; + break; + case SVGA3D_CMP_LESSEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + swapSrc = TRUE; + break; + case SVGA3D_CMP_GREATER: + opcode0.opcodeType = VGPU10_OPCODE_LT; + swapSrc = TRUE; + break; + case SVGA3D_CMP_NOTEQUAL: + opcode0.opcodeType = VGPU10_OPCODE_NE; + break; + case SVGA3D_CMP_GREATEREQUAL: + opcode0.opcodeType = VGPU10_OPCODE_GE; + break; + default: + assert(!"Unexpected comparison mode"); + opcode0.opcodeType = VGPU10_OPCODE_EQ; + } + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dst_register(emit, dst); + if (swapSrc) { + emit_src_register(emit, src1); + emit_src_register(emit, src0); + } + else { + emit_src_register(emit, src0); + emit_src_register(emit, src1); + } + end_emit_instruction(emit); +} + + +/** + * Get texel/address offsets for a texture instruction. + */ +static void +get_texel_offsets(const struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, int offsets[3]) +{ + if (inst->Texture.NumOffsets == 1) { + /* According to OpenGL Shader Language spec the offsets are only + * fetched from a previously-declared immediate/literal. + */ + const struct tgsi_texture_offset *off = inst->TexOffsets; + const unsigned index = off[0].Index; + const unsigned swizzleX = off[0].SwizzleX; + const unsigned swizzleY = off[0].SwizzleY; + const unsigned swizzleZ = off[0].SwizzleZ; + const union tgsi_immediate_data *imm = emit->immediates[index]; + + assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); + + offsets[0] = imm[swizzleX].Int; + offsets[1] = imm[swizzleY].Int; + offsets[2] = imm[swizzleZ].Int; + } + else { + offsets[0] = offsets[1] = offsets[2] = 0; + } +} + + +/** + * Set up the coordinate register for texture sampling. + * When we're sampling from a RECT texture we have to scale the + * unnormalized coordinate to a normalized coordinate. + * We do that by multiplying the coordinate by an "extra" constant. + * An alternative would be to use the RESINFO instruction to query the + * texture's size. + */ +static struct tgsi_full_src_register +setup_texcoord(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_src_register *coord) +{ + if (emit->key.tex[unit].unnormalized) { + unsigned scale_index = emit->texcoord_scale_index[unit]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); + + /* MUL tmp, coord, const[] */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, + coord, &scale_src, FALSE); + return tmp_src; + } + else { + /* use texcoord as-is */ + return *coord; + } +} + + +/** + * For SAMPLE_C instructions, emit the extra src register which indicates + * the reference/comparision value. + */ +static void +emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, + unsigned target, + const struct tgsi_full_src_register *coord) +{ + struct tgsi_full_src_register coord_src_ref; + unsigned component; + + assert(tgsi_is_shadow_target(target)); + + assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ + if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE) + component = TGSI_SWIZZLE_W; + else + component = TGSI_SWIZZLE_Z; + + coord_src_ref = scalar_src(coord, component); + + emit_src_register(emit, &coord_src_ref); +} + + +/** + * Info for implementing texture swizzles. + * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() + * functions use this to encapsulate the extra steps needed to perform + * a texture swizzle, or shadow/depth comparisons. + * The shadow/depth comparison is only done here if for the cases where + * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). + */ +struct tex_swizzle_info +{ + boolean swizzled; + boolean shadow_compare; + unsigned unit; + unsigned texture_target; /**< TGSI_TEXTURE_x */ + struct tgsi_full_src_register tmp_src; + struct tgsi_full_dst_register tmp_dst; + const struct tgsi_full_dst_register *inst_dst; + const struct tgsi_full_src_register *coord_src; +}; + + +/** + * Do setup for handling texture swizzles or shadow compares. + * \param unit the texture unit + * \param inst the TGSI texture instruction + * \param shadow_compare do shadow/depth comparison? + * \param swz returns the swizzle info + */ +static void +begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, + unsigned unit, + const struct tgsi_full_instruction *inst, + boolean shadow_compare, + struct tex_swizzle_info *swz) +{ + swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || + emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || + emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || + emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); + + swz->shadow_compare = shadow_compare; + swz->texture_target = inst->Texture.Texture; + + if (swz->swizzled || shadow_compare) { + /* Allocate temp register for the result of the SAMPLE instruction + * and the source of the MOV/compare/swizzle instructions. + */ + unsigned tmp = get_temp_index(emit); + swz->tmp_src = make_src_temp_reg(tmp); + swz->tmp_dst = make_dst_temp_reg(tmp); + + swz->unit = unit; + } + swz->inst_dst = &inst->Dst[0]; + swz->coord_src = &inst->Src[0]; +} + + +/** + * Returns the register to put the SAMPLE instruction results into. + * This will either be the original instruction dst reg (if no swizzle + * and no shadow comparison) or a temporary reg if there is a swizzle. + */ +static const struct tgsi_full_dst_register * +get_tex_swizzle_dst(const struct tex_swizzle_info *swz) +{ + return (swz->swizzled || swz->shadow_compare) + ? &swz->tmp_dst : swz->inst_dst; +} + + +/** + * This emits the MOV instruction that actually implements a texture swizzle + * and/or shadow comparison. + */ +static void +end_tex_swizzle(struct svga_shader_emitter_v10 *emit, + const struct tex_swizzle_info *swz) +{ + if (swz->shadow_compare) { + /* Emit extra instructions to compare the fetched texel value against + * a texture coordinate component. The result of the comparison + * is 0.0 or 1.0. + */ + struct tgsi_full_src_register coord_src; + struct tgsi_full_src_register texel_src = + scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + /* convert gallium comparison func to SVGA comparison func */ + SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + switch (swz->texture_target) { + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); + break; + case TGSI_TEXTURE_SHADOW1D: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); + break; + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); + break; + default: + assert(!"Unexpected texture target in end_tex_swizzle()"); + coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); + } + + /* COMPARE tmp, coord, texel */ + /* XXX it would seem that the texel and coord arguments should + * be transposed here, but piglit tests indicate otherwise. + */ + emit_comparison(emit, compare_func, + &swz->tmp_dst, &texel_src, &coord_src); + + /* AND dest, tmp, {1.0} */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); + if (swz->swizzled) { + emit_dst_register(emit, &swz->tmp_dst); + } + else { + emit_dst_register(emit, swz->inst_dst); + } + emit_src_register(emit, &swz->tmp_src); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } + + if (swz->swizzled) { + unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; + unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; + unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; + unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; + unsigned writemask_0 = 0, writemask_1 = 0; + boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type); + + /* Swizzle w/out zero/one terms */ + struct tgsi_full_src_register src_swizzled = + swizzle_src(&swz->tmp_src, + swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED, + swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN, + swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE, + swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA); + + /* MOV dst, color(tmp). */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + swz->inst_dst, &src_swizzled, FALSE); + + /* handle swizzle zero terms */ + writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) | + ((swz_g == PIPE_SWIZZLE_ZERO) << 1) | + ((swz_b == PIPE_SWIZZLE_ZERO) << 2) | + ((swz_a == PIPE_SWIZZLE_ZERO) << 3)); + + if (writemask_0) { + struct tgsi_full_src_register zero = int_tex ? + make_immediate_reg_int(emit, 0) : + make_immediate_reg_float(emit, 0.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_0); + + /* MOV dst.writemask_0, {0,0,0,0} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &dst, &zero, FALSE); + } + + /* handle swizzle one terms */ + writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) | + ((swz_g == PIPE_SWIZZLE_ONE) << 1) | + ((swz_b == PIPE_SWIZZLE_ONE) << 2) | + ((swz_a == PIPE_SWIZZLE_ONE) << 3)); + + if (writemask_1) { + struct tgsi_full_src_register one = int_tex ? + make_immediate_reg_int(emit, 1) : + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_dst_register dst = + writemask_dst(swz->inst_dst, writemask_1); + + /* MOV dst.writemask_1, {1,1,1,1} */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); + } + } +} + + +/** + * Emit code for TGSI_OPCODE_SAMPLE instruction. + */ +static boolean +emit_sample(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned resource_unit = inst->Src[1].Register.Index; + const unsigned sampler_unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); + + /* SAMPLE dst, coord(s0), resource, sampler */ + begin_emit_instruction(emit); + + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, resource_unit); + emit_sampler_register(emit, sampler_unit); + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Check if a texture instruction is valid. + * An example of an invalid texture instruction is doing shadow comparison + * with an integer-valued texture. + * If we detect an invalid texture instruction, we replace it with: + * MOV dst, {1,1,1,1}; + * \return TRUE if valid, FALSE if invalid. + */ +static boolean +is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned unit = inst->Src[1].Register.Index; + const unsigned target = inst->Texture.Texture; + boolean valid = TRUE; + + if (tgsi_is_shadow_target(target) && + is_integer_type(emit->key.tex[unit].return_type)) { + debug_printf("Invalid SAMPLE_C with an integer texture!\n"); + valid = FALSE; + } + /* XXX might check for other conditions in the future here */ + + if (!valid) { + /* emit a MOV dst, {1,1,1,1} instruction. */ + struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &one); + end_emit_instruction(emit); + } + + return valid; +} + + +/** + * Emit code for TGSI_OPCODE_TEX (simple texture lookup) + */ +static boolean +emit_tex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + unsigned target = inst->Texture.Texture; + unsigned opcode; + struct tgsi_full_src_register coord; + int offsets[3]; + struct tex_swizzle_info swz_info; + + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE dst, coord(s0), resource, sampler */ + begin_emit_instruction(emit); + + if (tgsi_is_shadow_target(target)) + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; + + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &coord); + } + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXP (projective texture) + */ +static boolean +emit_txp(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + unsigned target = inst->Texture.Texture; + unsigned opcode; + int offsets[3]; + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register src0_wwww = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + struct tgsi_full_src_register coord; + struct tex_swizzle_info swz_info; + + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* DIV tmp, coord, coord.wwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, + &coord, &src0_wwww, FALSE); + + /* SAMPLE dst, coord(tmp), resource, sampler */ + begin_emit_instruction(emit); + + if (tgsi_is_shadow_target(target)) + opcode = VGPU10_OPCODE_SAMPLE_C; + else + opcode = VGPU10_OPCODE_SAMPLE; + + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &tmp_src); /* projected coord */ + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &tmp_src); + } + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/* + * Emit code for TGSI_OPCODE_XPD instruction. + */ +static boolean +emit_xpd(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + /* dst.x = src0.y * src1.z - src1.y * src0.z + * dst.y = src0.z * src1.x - src1.z * src0.x + * dst.z = src0.x * src1.y - src1.x * src0.y + * dst.w = 1 + */ + struct tgsi_full_src_register s0_xxxx = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); + struct tgsi_full_src_register s0_yyyy = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s0_zzzz = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); + + struct tgsi_full_src_register s1_xxxx = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + struct tgsi_full_src_register s1_yyyy = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); + struct tgsi_full_src_register s1_zzzz = + scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); + + unsigned tmp1 = get_temp_index(emit); + struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); + struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); + + unsigned tmp2 = get_temp_index(emit); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); + struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); + + unsigned tmp3 = get_temp_index(emit); + struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); + struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); + struct tgsi_full_dst_register tmp3_dst_x = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); + struct tgsi_full_dst_register tmp3_dst_y = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); + struct tgsi_full_dst_register tmp3_dst_z = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); + struct tgsi_full_dst_register tmp3_dst_w = + writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); + + /* Note: we put all the intermediate computations into tmp3 in case + * the XPD dest register is that same as one of the src regs (in which + * case we could clobber a src reg before we're done with it) . + * + * Note: we could get by with just one temp register instead of three + * since we're doing scalar operations and there's enough room in one + * temp for everything. + */ + + /* MUL tmp1, src0.y, src1.z */ + /* MUL tmp2, src1.y, src0.z */ + /* ADD tmp3.x, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, + &s0_yyyy, &s1_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, + &s1_yyyy, &s0_zzzz, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MUL tmp1, src0.z, src1.x */ + /* MUL tmp2, src1.z, src0.x */ + /* ADD tmp3.y, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, + &s1_xxxx, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, + &s0_xxxx, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MUL tmp1, src0.x, src1.y */ + /* MUL tmp2, src1.x, src0.y */ + /* ADD tmp3.z, tmp1, -tmp2 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, + &s1_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, + &s0_yyyy, FALSE); + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, + &tmp1_src, &neg_tmp2_src, FALSE); + } + + /* MOV tmp3.w, 1.0 */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); + } + + /* MOV dst, tmp3 */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, + inst->Instruction.Saturate); + + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXD (explicit derivatives) + */ +static boolean +emit_txd(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + unsigned target = inst->Texture.Texture; + int offsets[3]; + struct tgsi_full_src_register coord; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &inst->Src[1]); /* Xderiv */ + emit_src_register(emit, &inst->Src[2]); /* Yderiv */ + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXF (texel fetch) + */ +static boolean +emit_txf(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + const unsigned msaa = emit->key.tex[unit].texture_msaa; + int offsets[3]; + struct tex_swizzle_info swz_info; + + begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + + get_texel_offsets(emit, inst, offsets); + + if (msaa) { + /* Fetch one sample from an MSAA texture */ + struct tgsi_full_src_register sampleIndex = + scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + /* LD_MS dst, coord(s0), resource, sampleIndex */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + emit_src_register(emit, &sampleIndex); + end_emit_instruction(emit); + } + else { + /* Fetch one texel specified by integer coordinate */ + /* LD dst, coord(s0), resource */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_LD, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + end_emit_instruction(emit); + } + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) + * or TGSI_OPCODE_TXB2 (for cube shadow maps). + */ +static boolean +emit_txl_txb(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned target = inst->Texture.Texture; + unsigned opcode, unit; + int offsets[3]; + struct tgsi_full_src_register coord, lod_bias; + struct tex_swizzle_info swz_info; + + assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || + inst->Instruction.Opcode == TGSI_OPCODE_TXB || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2); + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { + lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); + unit = inst->Src[2].Register.Index; + } + else { + lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); + unit = inst->Src[1].Register.Index; + } + + begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), + &swz_info); + + get_texel_offsets(emit, inst, offsets); + + coord = setup_texcoord(emit, unit, &inst->Src[0]); + + /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ + begin_emit_instruction(emit); + if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { + opcode = VGPU10_OPCODE_SAMPLE_L; + } + else { + opcode = VGPU10_OPCODE_SAMPLE_B; + } + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); + emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); + emit_src_register(emit, &coord); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + emit_src_register(emit, &lod_bias); + end_emit_instruction(emit); + + end_tex_swizzle(emit, &swz_info); + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. + */ +static boolean +emit_txq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + + if (emit->key.tex[unit].texture_target == PIPE_BUFFER) { + /* RESINFO does not support querying texture buffers, so we instead + * store texture buffer sizes in shader constants, then copy them to + * implement TXQ instead of emitting RESINFO. + * MOV dst, const[texture_buffer_size_index[unit]] + */ + struct tgsi_full_src_register size_src = + make_src_const_reg(emit->texture_buffer_size_index[unit]); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, + FALSE); + } else { + /* RESINFO dst, srcMipLevel, resource */ + begin_emit_instruction(emit); + emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &inst->Src[0]); + emit_resource_register(emit, unit); + end_emit_instruction(emit); + } + + free_temp_indexes(emit); + + return TRUE; +} + + +/** + * Emit a simple instruction (like ADD, MUL, MIN, etc). + */ +static boolean +emit_simple(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const unsigned opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + unsigned i; + + begin_emit_instruction(emit); + emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), + inst->Instruction.Saturate); + for (i = 0; i < op->num_dst; i++) { + emit_dst_register(emit, &inst->Dst[i]); + } + for (i = 0; i < op->num_src; i++) { + emit_src_register(emit, &inst->Src[i]); + } + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Emit a simple VGPU10 instruction which writes to multiple dest registers, + * where TGSI only uses one dest register. + */ +static boolean +emit_simple_1dst(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned dst_count, + unsigned dst_index) +{ + const unsigned opcode = inst->Instruction.Opcode; + const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); + unsigned i; + + begin_emit_instruction(emit); + emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), + inst->Instruction.Saturate); + + for (i = 0; i < dst_count; i++) { + if (i == dst_index) { + emit_dst_register(emit, &inst->Dst[0]); + } else { + emit_null_dst_register(emit); + } + } + + for (i = 0; i < op->num_src; i++) { + emit_src_register(emit, &inst->Src[i]); + } + end_emit_instruction(emit); + + return TRUE; +} + + +/** + * Translate a single TGSI instruction to VGPU10. + */ +static boolean +emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + const unsigned opcode = inst->Instruction.Opcode; + + switch (opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_AND: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_BRK: + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_CONT: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + case TGSI_OPCODE_DIV: + case TGSI_OPCODE_DP2: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDSUB: + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_FSEQ: + case TGSI_OPCODE_FSGE: + case TGSI_OPCODE_FSLT: + case TGSI_OPCODE_FSNE: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_INEG: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_NOP: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_OR: + case TGSI_OPCODE_RET: + case TGSI_OPCODE_UADD: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_SQRT: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_TRUNC: + case TGSI_OPCODE_U2F: + case TGSI_OPCODE_UCMP: + case TGSI_OPCODE_USHR: + case TGSI_OPCODE_USNE: + case TGSI_OPCODE_XOR: + /* simple instructions */ + return emit_simple(emit, inst); + + + case TGSI_OPCODE_EMIT: + return emit_vertex(emit, inst); + case TGSI_OPCODE_ENDPRIM: + return emit_endprim(emit, inst); + case TGSI_OPCODE_ABS: + return emit_abs(emit, inst); + case TGSI_OPCODE_IABS: + return emit_iabs(emit, inst); + case TGSI_OPCODE_ARL: + /* fall-through */ + case TGSI_OPCODE_UARL: + return emit_arl_uarl(emit, inst); + case TGSI_OPCODE_BGNSUB: + /* no-op */ + return TRUE; + case TGSI_OPCODE_CAL: + return emit_cal(emit, inst); + case TGSI_OPCODE_CMP: + return emit_cmp(emit, inst); + case TGSI_OPCODE_COS: + return emit_sincos(emit, inst); + case TGSI_OPCODE_DP2A: + return emit_dp2a(emit, inst); + case TGSI_OPCODE_DPH: + return emit_dph(emit, inst); + case TGSI_OPCODE_DST: + return emit_dst(emit, inst); + case TGSI_OPCODE_EX2: + return emit_ex2(emit, inst); + case TGSI_OPCODE_EXP: + return emit_exp(emit, inst); + case TGSI_OPCODE_IF: + return emit_if(emit, inst); + case TGSI_OPCODE_KILL: + return emit_kill(emit, inst); + case TGSI_OPCODE_KILL_IF: + return emit_kill_if(emit, inst); + case TGSI_OPCODE_LG2: + return emit_lg2(emit, inst); + case TGSI_OPCODE_LIT: + return emit_lit(emit, inst); + case TGSI_OPCODE_LOG: + return emit_log(emit, inst); + case TGSI_OPCODE_LRP: + return emit_lrp(emit, inst); + case TGSI_OPCODE_POW: + return emit_pow(emit, inst); + case TGSI_OPCODE_RCP: + return emit_rcp(emit, inst); + case TGSI_OPCODE_RSQ: + return emit_rsq(emit, inst); + case TGSI_OPCODE_SAMPLE: + return emit_sample(emit, inst); + case TGSI_OPCODE_SCS: + return emit_scs(emit, inst); + case TGSI_OPCODE_SEQ: + return emit_seq(emit, inst); + case TGSI_OPCODE_SGE: + return emit_sge(emit, inst); + case TGSI_OPCODE_SGT: + return emit_sgt(emit, inst); + case TGSI_OPCODE_SIN: + return emit_sincos(emit, inst); + case TGSI_OPCODE_SLE: + return emit_sle(emit, inst); + case TGSI_OPCODE_SLT: + return emit_slt(emit, inst); + case TGSI_OPCODE_SNE: + return emit_sne(emit, inst); + case TGSI_OPCODE_SSG: + return emit_ssg(emit, inst); + case TGSI_OPCODE_ISSG: + return emit_issg(emit, inst); + case TGSI_OPCODE_SUB: + return emit_sub(emit, inst); + case TGSI_OPCODE_TEX: + return emit_tex(emit, inst); + case TGSI_OPCODE_TXP: + return emit_txp(emit, inst); + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXL: + return emit_txl_txb(emit, inst); + case TGSI_OPCODE_TXD: + return emit_txd(emit, inst); + case TGSI_OPCODE_TXF: + return emit_txf(emit, inst); + case TGSI_OPCODE_TXQ: + return emit_txq(emit, inst); + case TGSI_OPCODE_UIF: + return emit_if(emit, inst); + case TGSI_OPCODE_XPD: + return emit_xpd(emit, inst); + case TGSI_OPCODE_UMUL_HI: + case TGSI_OPCODE_IMUL_HI: + case TGSI_OPCODE_UDIV: + case TGSI_OPCODE_IDIV: + /* These cases use only the FIRST of two destination registers */ + return emit_simple_1dst(emit, inst, 2, 0); + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_UMOD: + case TGSI_OPCODE_MOD: + /* These cases use only the SECOND of two destination registers */ + return emit_simple_1dst(emit, inst, 2, 1); + case TGSI_OPCODE_END: + if (!emit_post_helpers(emit)) + return FALSE; + return emit_simple(emit, inst); + + default: + debug_printf("Unimplemented tgsi instruction %s\n", + tgsi_get_opcode_name(opcode)); + return FALSE; + } + + return TRUE; +} + + +/** + * Emit the extra instructions to adjust the vertex position. + * There are two possible adjustments: + * 1. Converting from Gallium to VGPU10 coordinate space by applying the + * "prescale" and "pretranslate" values. + * 2. Undoing the viewport transformation when we use the swtnl/draw path. + * \param vs_pos_tmp_index which temporary register contains the vertex pos. + */ +static void +emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, + unsigned vs_pos_tmp_index) +{ + struct tgsi_full_src_register tmp_pos_src; + struct tgsi_full_dst_register pos_dst; + + /* Don't bother to emit any extra vertex instructions if vertex position is + * not written out + */ + if (emit->vposition.out_index == INVALID_INDEX) + return; + + tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); + pos_dst = make_dst_output_reg(emit->vposition.out_index); + + /* If non-adjusted vertex position register index + * is valid, copy the vertex position from the temporary + * vertex position register before it is modified by the + * prescale computation. + */ + if (emit->vposition.so_index != INVALID_INDEX) { + struct tgsi_full_dst_register pos_so_dst = + make_dst_output_reg(emit->vposition.so_index); + + /* MOV pos_so, tmp_pos */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, + &tmp_pos_src, FALSE); + } + + if (emit->vposition.need_prescale) { + /* This code adjusts the vertex position to match the VGPU10 convention. + * If p is the position computed by the shader (usually by applying the + * modelview and projection matrices), the new position q is computed by: + * + * q.x = p.w * trans.x + p.x * scale.x + * q.y = p.w * trans.y + p.y * scale.y + * q.z = p.w * trans.z + p.z * scale.z; + * q.w = p.w * trans.w + p.w; + */ + struct tgsi_full_src_register tmp_pos_src_w = + scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); + struct tgsi_full_dst_register tmp_pos_dst = + make_dst_temp_reg(vs_pos_tmp_index); + struct tgsi_full_dst_register tmp_pos_dst_xyz = + writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); + + struct tgsi_full_src_register prescale_scale = + make_src_const_reg(emit->vposition.prescale_scale_index); + struct tgsi_full_src_register prescale_trans = + make_src_const_reg(emit->vposition.prescale_trans_index); + + /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, + &tmp_pos_src, &prescale_scale, FALSE); + + /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ + emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, + &prescale_trans, &tmp_pos_src, FALSE); + } + else if (emit->key.vs.undo_viewport) { + /* This code computes the final vertex position from the temporary + * vertex position by undoing the viewport transformation and the + * divide-by-W operation (we convert window coords back to clip coords). + * This is needed when we use the 'draw' module for fallbacks. + * If p is the temp pos in window coords, then the NDC coord q is: + * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w + * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w + * q.z = p.z * p.w + * q.w = p.w + * CONST[vs_viewport_index] contains: + * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } + */ + struct tgsi_full_dst_register tmp_pos_dst = + make_dst_temp_reg(vs_pos_tmp_index); + struct tgsi_full_dst_register tmp_pos_dst_xy = + writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); + struct tgsi_full_src_register tmp_pos_src_wwww = + scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); + + struct tgsi_full_dst_register pos_dst_xyz = + writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); + struct tgsi_full_dst_register pos_dst_w = + writemask_dst(&pos_dst, TGSI_WRITEMASK_W); + + struct tgsi_full_src_register vp_xyzw = + make_src_const_reg(emit->vs.viewport_index); + struct tgsi_full_src_register vp_zwww = + swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); + + /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, + &tmp_pos_src, &vp_zwww, FALSE); + + /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, + &tmp_pos_src, &vp_xyzw, FALSE); + + /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ + emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, + &tmp_pos_src, &tmp_pos_src_wwww, FALSE); + + /* MOV pos.w, tmp_pos.w */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, + &tmp_pos_src, FALSE); + } + else if (vs_pos_tmp_index != INVALID_INDEX) { + /* This code is to handle the case where the temporary vertex + * position register is created when the vertex shader has stream + * output and prescale is disabled because rasterization is to be + * discarded. + */ + struct tgsi_full_dst_register pos_dst = + make_dst_output_reg(emit->vposition.out_index); + + /* MOV pos, tmp_pos */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &pos_dst); + emit_src_register(emit, &tmp_pos_src); + end_emit_instruction(emit); + } +} + +static void +emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) +{ + if (emit->clip_mode == CLIP_DISTANCE) { + /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ + emit_clip_distance_instructions(emit); + + } else if (emit->clip_mode == CLIP_VERTEX) { + /* Convert TGSI CLIPVERTEX to CLIPDIST */ + emit_clip_vertex_instructions(emit); + } + + /** + * Emit vertex position and take care of legacy user planes only if + * there is a valid vertex position register index. + * This is to take care of the case + * where the shader doesn't output vertex position. Then in + * this case, don't bother to emit more vertex instructions. + */ + if (emit->vposition.out_index == INVALID_INDEX) + return; + + /** + * Emit per-vertex clipping instructions for legacy user defined clip planes. + * NOTE: we must emit the clip distance instructions before the + * emit_vpos_instructions() call since the later function will change + * the TEMP[vs_pos_tmp_index] value. + */ + if (emit->clip_mode == CLIP_LEGACY) { + /* Emit CLIPDIST for legacy user defined clip planes */ + emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); + } +} + + +/** + * Emit extra per-vertex instructions. This includes clip-coordinate + * space conversion and computing clip distances. This is called for + * each GS emit-vertex instruction and at the end of VS translation. + */ +static void +emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; + + /* Emit clipping instructions based on clipping mode */ + emit_clipping_instructions(emit); + + /** + * Reset the temporary vertex position register index + * so that emit_dst_register() will use the real vertex position output + */ + emit->vposition.tmp_index = INVALID_INDEX; + + /* Emit vertex position instructions */ + emit_vpos_instructions(emit, vs_pos_tmp_index); + + /* Restore original vposition.tmp_index value for the next GS vertex. + * It doesn't matter for VS. + */ + emit->vposition.tmp_index = vs_pos_tmp_index; +} + +/** + * Translate the TGSI_OPCODE_EMIT GS instruction. + */ +static boolean +emit_vertex(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned ret = TRUE; + + assert(emit->unit == PIPE_SHADER_GEOMETRY); + + emit_vertex_instructions(emit); + + /* We can't use emit_simple() because the TGSI instruction has one + * operand (vertex stream number) which we must ignore for VGPU10. + */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); + end_emit_instruction(emit); + + return ret; +} + + +/** + * Emit the extra code to convert from VGPU10's boolean front-face + * register to TGSI's signed front-face register. + * + * TODO: Make temporary front-face register a scalar. + */ +static void +emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + if (emit->fs.face_input_index != INVALID_INDEX) { + /* convert vgpu10 boolean face register to gallium +/-1 value */ + struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(emit->fs.face_tmp_index); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register neg_one = + make_immediate_reg_float(emit, -1.0f); + + /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); + emit_dst_register(emit, &tmp_dst); + emit_face_register(emit); + emit_src_register(emit, &one); + emit_src_register(emit, &neg_one); + end_emit_instruction(emit); + } +} + + +/** + * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. + */ +static void +emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + if (emit->fs.fragcoord_input_index != INVALID_INDEX) { + struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(emit->fs.fragcoord_tmp_index); + struct tgsi_full_dst_register tmp_dst_xyz = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); + struct tgsi_full_dst_register tmp_dst_w = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + struct tgsi_full_src_register fragcoord = + make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); + + /* save the input index */ + unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; + /* set to invalid to prevent substitution in emit_src_register() */ + emit->fs.fragcoord_input_index = INVALID_INDEX; + + /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_dst_register(emit, &tmp_dst_xyz); + emit_src_register(emit, &fragcoord); + end_emit_instruction(emit); + + /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); + emit_dst_register(emit, &tmp_dst_w); + emit_src_register(emit, &one); + emit_src_register(emit, &fragcoord); + end_emit_instruction(emit); + + /* restore saved value */ + emit->fs.fragcoord_input_index = fragcoord_input_index; + } +} + + +/** + * Emit extra instructions to adjust VS inputs/attributes. This can + * mean casting a vertex attribute from int to float or setting the + * W component to 1, or both. + */ +static void +emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) +{ + const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; + const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; + const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; + const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; + const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; + const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; + const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; + + unsigned adjust_mask = (save_w_1_mask | + save_itof_mask | + save_utof_mask | + save_is_bgra_mask | + save_puint_to_snorm_mask | + save_puint_to_uscaled_mask | + save_puint_to_sscaled_mask); + + assert(emit->unit == PIPE_SHADER_VERTEX); + + if (adjust_mask) { + struct tgsi_full_src_register one = + make_immediate_reg_float(emit, 1.0f); + + struct tgsi_full_src_register one_int = + make_immediate_reg_int(emit, 1); + + /* We need to turn off these bitmasks while emitting the + * instructions below, then restore them afterward. + */ + emit->key.vs.adjust_attrib_w_1 = 0; + emit->key.vs.adjust_attrib_itof = 0; + emit->key.vs.adjust_attrib_utof = 0; + emit->key.vs.attrib_is_bgra = 0; + emit->key.vs.attrib_puint_to_snorm = 0; + emit->key.vs.attrib_puint_to_uscaled = 0; + emit->key.vs.attrib_puint_to_sscaled = 0; + + while (adjust_mask) { + unsigned index = u_bit_scan(&adjust_mask); + unsigned tmp = emit->vs.adjusted_input[index]; + struct tgsi_full_src_register input_src = + make_src_reg(TGSI_FILE_INPUT, index); + + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst_w = + writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); + + /* ITOF/UTOF/MOV tmp, input[index] */ + if (save_itof_mask & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, + &tmp_dst, &input_src, FALSE); + } + else if (save_utof_mask & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, + &tmp_dst, &input_src, FALSE); + } + else if (save_puint_to_snorm_mask & (1 << index)) { + emit_puint_to_snorm(emit, &tmp_dst, &input_src); + } + else if (save_puint_to_uscaled_mask & (1 << index)) { + emit_puint_to_uscaled(emit, &tmp_dst, &input_src); + } + else if (save_puint_to_sscaled_mask & (1 << index)) { + emit_puint_to_sscaled(emit, &tmp_dst, &input_src); + } + else { + assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst, &input_src, FALSE); + } + + if (save_is_bgra_mask & (1 << index)) { + emit_swap_r_b(emit, &tmp_dst, &tmp_src); + } + + if (save_w_1_mask & (1 << index)) { + /* MOV tmp.w, 1.0 */ + if (emit->key.vs.attrib_is_pure_int & (1 << index)) { + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst_w, &one_int, FALSE); + } + else { + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &tmp_dst_w, &one, FALSE); + } + } + } + + emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; + emit->key.vs.adjust_attrib_itof = save_itof_mask; + emit->key.vs.adjust_attrib_utof = save_utof_mask; + emit->key.vs.attrib_is_bgra = save_is_bgra_mask; + emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; + emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; + emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; + } +} + + +/** + * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed + * to implement some instructions. We pre-allocate those values here + * in the immediate constant buffer. + */ +static void +alloc_common_immediates(struct svga_shader_emitter_v10 *emit) +{ + unsigned n = 0; + + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); + + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 0, 1, 0, -1); + + if (emit->key.vs.attrib_puint_to_snorm) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); + } + + if (emit->key.vs.attrib_puint_to_uscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); + } + + if (emit->key.vs.attrib_puint_to_sscaled) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 12, 2, 0); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 22, 30, 0, 0); + } + + assert(n <= Elements(emit->common_immediate_pos)); + emit->num_common_immediates = n; +} + + +/** + * Emit any extra/helper declarations/code that we might need between + * the declaration section and code section. + */ +static boolean +emit_pre_helpers(struct svga_shader_emitter_v10 *emit) +{ + /* Properties */ + if (emit->unit == PIPE_SHADER_GEOMETRY) + emit_property_instructions(emit); + + /* Declare inputs */ + if (!emit_input_declarations(emit)) + return FALSE; + + /* Declare outputs */ + if (!emit_output_declarations(emit)) + return FALSE; + + /* Declare temporary registers */ + emit_temporaries_declaration(emit); + + /* Declare constant registers */ + emit_constant_declaration(emit); + + /* Declare samplers and resources */ + emit_sampler_declarations(emit); + emit_resource_declarations(emit); + + /* Declare clip distance output registers */ + if (emit->unit == PIPE_SHADER_VERTEX || + emit->unit == PIPE_SHADER_GEOMETRY) { + emit_clip_distance_declarations(emit); + } + + alloc_common_immediates(emit); + + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { + float alpha = emit->key.fs.alpha_ref; + emit->fs.alpha_ref_index = + alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); + } + + /* Now, emit the constant block containing all the immediates + * declared by shader, as well as the extra ones seen above. + */ + emit_vgpu10_immediates_block(emit); + + if (emit->unit == PIPE_SHADER_FRAGMENT) { + emit_frontface_instructions(emit); + emit_fragcoord_instructions(emit); + } + else if (emit->unit == PIPE_SHADER_VERTEX) { + emit_vertex_attrib_instructions(emit); + } + + return TRUE; +} + + +/** + * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w + * against the alpha reference value and discards the fragment if the + * comparison fails. + */ +static void +emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, + unsigned fs_color_tmp_index) +{ + /* compare output color's alpha to alpha ref and kill */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_src_register tmp_src_x = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register color_src = + make_src_temp_reg(fs_color_tmp_index); + struct tgsi_full_src_register color_src_w = + scalar_src(&color_src, TGSI_SWIZZLE_W); + struct tgsi_full_src_register ref_src = + make_src_immediate_reg(emit->fs.alpha_ref_index); + struct tgsi_full_dst_register color_dst = + make_dst_output_reg(emit->fs.color_out_index[0]); + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + /* dst = src0 'alpha_func' src1 */ + emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, + &color_src_w, &ref_src); + + /* DISCARD if dst.x == 0 */ + begin_emit_instruction(emit); + emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ + emit_src_register(emit, &tmp_src_x); + end_emit_instruction(emit); + + /* If we don't need to broadcast the color below, emit final color here */ + if (emit->key.fs.write_color0_to_n_cbufs <= 1) { + /* MOV output.color, tempcolor */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, + &color_src, FALSE); /* XXX saturate? */ + } + + free_temp_indexes(emit); +} + + +/** + * Emit instructions for writing a single color output to multiple + * color buffers. + * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS + * property is set and the number of render targets is greater than one. + * \param fs_color_tmp_index index of the temp register that holds the + * color to broadcast. + */ +static void +emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, + unsigned fs_color_tmp_index) +{ + const unsigned n = emit->key.fs.write_color0_to_n_cbufs; + unsigned i; + struct tgsi_full_src_register color_src = + make_src_temp_reg(fs_color_tmp_index); + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + assert(n > 1); + + for (i = 0; i < n; i++) { + unsigned output_reg = emit->fs.color_out_index[i]; + struct tgsi_full_dst_register color_dst = + make_dst_output_reg(output_reg); + + /* Fill in this semantic here since we'll use it later in + * emit_dst_register(). + */ + emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; + + /* MOV output.color[i], tempcolor */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, + &color_src, FALSE); /* XXX saturate? */ + } +} + + +/** + * Emit extra helper code after the original shader code, but before the + * last END/RET instruction. + * For vertex shaders this means emitting the extra code to apply the + * prescale scale/translation. + */ +static boolean +emit_post_helpers(struct svga_shader_emitter_v10 *emit) +{ + if (emit->unit == PIPE_SHADER_VERTEX) { + emit_vertex_instructions(emit); + } + else if (emit->unit == PIPE_SHADER_FRAGMENT) { + const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; + + /* We no longer want emit_dst_register() to substitute the + * temporary fragment color register for the real color output. + */ + emit->fs.color_tmp_index = INVALID_INDEX; + + if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { + emit_alpha_test_instructions(emit, fs_color_tmp_index); + } + if (emit->key.fs.write_color0_to_n_cbufs > 1) { + emit_broadcast_color_instructions(emit, fs_color_tmp_index); + } + } + + return TRUE; +} + + +/** + * Translate the TGSI tokens into VGPU10 tokens. + */ +static boolean +emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, + const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + boolean ret = TRUE; + boolean pre_helpers_emitted = FALSE; + unsigned inst_number = 0; + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!pre_helpers_emitted) { + ret = emit_pre_helpers(emit); + if (!ret) + goto done; + pre_helpers_emitted = TRUE; + } + ret = emit_vgpu10_instruction(emit, inst_number++, + &parse.FullToken.FullInstruction); + if (!ret) + goto done; + break; + + case TGSI_TOKEN_TYPE_PROPERTY: + ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); + if (!ret) + goto done; + break; + + default: + break; + } + } + +done: + tgsi_parse_free(&parse); + return ret; +} + + +/** + * Emit the first VGPU10 shader tokens. + */ +static boolean +emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) +{ + VGPU10ProgramToken ptoken; + + /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ + ptoken.majorVersion = 4; + ptoken.minorVersion = 0; + ptoken.programType = translate_shader_type(emit->unit); + if (!emit_dword(emit, ptoken.value)) + return FALSE; + + /* Second token: total length of shader, in tokens. We can't fill this + * in until we're all done. Emit zero for now. + */ + return emit_dword(emit, 0); +} + + +static boolean +emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) +{ + VGPU10ProgramToken *tokens; + + /* Replace the second token with total shader length */ + tokens = (VGPU10ProgramToken *) emit->buf; + tokens[1].value = emit_get_num_tokens(emit); + + return TRUE; +} + + +/** + * Modify the FS to read the BCOLORs and use the FACE register + * to choose between the front/back colors. + */ +static const struct tgsi_token * +transform_fs_twoside(const struct tgsi_token *tokens) +{ + if (0) { + debug_printf("Before tgsi_add_two_side ------------------\n"); + tgsi_dump(tokens,0); + } + tokens = tgsi_add_two_side(tokens); + if (0) { + debug_printf("After tgsi_add_two_side ------------------\n"); + tgsi_dump(tokens, 0); + } + return tokens; +} + + +/** + * Modify the FS to do polygon stipple. + */ +static const struct tgsi_token * +transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, + const struct tgsi_token *tokens) +{ + const struct tgsi_token *new_tokens; + unsigned unit; + + if (0) { + debug_printf("Before pstipple ------------------\n"); + tgsi_dump(tokens,0); + } + + new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0); + + emit->fs.pstipple_sampler_unit = unit; + + /* Setup texture state for stipple */ + emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; + emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; + emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; + emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; + emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; + + if (0) { + debug_printf("After pstipple ------------------\n"); + tgsi_dump(new_tokens, 0); + } + + return new_tokens; +} + +/** + * Modify the FS to support anti-aliasing point. + */ +static const struct tgsi_token * +transform_fs_aapoint(const struct tgsi_token *tokens, + int aa_coord_index) +{ + if (0) { + debug_printf("Before tgsi_add_aa_point ------------------\n"); + tgsi_dump(tokens,0); + } + tokens = tgsi_add_aa_point(tokens, aa_coord_index); + if (0) { + debug_printf("After tgsi_add_aa_point ------------------\n"); + tgsi_dump(tokens, 0); + } + return tokens; +} + +/** + * This is the main entrypoint for the TGSI -> VPGU10 translator. + */ +struct svga_shader_variant * +svga_tgsi_vgpu10_translate(struct svga_context *svga, + const struct svga_shader *shader, + const struct svga_compile_key *key, + unsigned unit) +{ + struct svga_shader_variant *variant = NULL; + struct svga_shader_emitter_v10 *emit; + const struct tgsi_token *tokens = shader->tokens; + struct svga_vertex_shader *vs = svga->curr.vs; + struct svga_geometry_shader *gs = svga->curr.gs; + + assert(unit == PIPE_SHADER_VERTEX || + unit == PIPE_SHADER_GEOMETRY || + unit == PIPE_SHADER_FRAGMENT); + + /* These two flags cannot be used together */ + assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); + + /* + * Setup the code emitter + */ + emit = alloc_emitter(); + if (!emit) + return NULL; + + emit->unit = unit; + emit->key = *key; + + emit->vposition.need_prescale = (emit->key.vs.need_prescale || + emit->key.gs.need_prescale); + emit->vposition.tmp_index = INVALID_INDEX; + emit->vposition.so_index = INVALID_INDEX; + emit->vposition.out_index = INVALID_INDEX; + + emit->fs.color_tmp_index = INVALID_INDEX; + emit->fs.face_input_index = INVALID_INDEX; + emit->fs.fragcoord_input_index = INVALID_INDEX; + + emit->gs.prim_id_index = INVALID_INDEX; + + emit->clip_dist_out_index = INVALID_INDEX; + emit->clip_dist_tmp_index = INVALID_INDEX; + emit->clip_dist_so_index = INVALID_INDEX; + emit->clip_vertex_out_index = INVALID_INDEX; + + if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { + emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; + } + + if (unit == PIPE_SHADER_FRAGMENT) { + if (key->fs.light_twoside) { + tokens = transform_fs_twoside(tokens); + } + if (key->fs.pstipple) { + const struct tgsi_token *new_tokens = + transform_fs_pstipple(emit, tokens); + if (tokens != shader->tokens) { + /* free the two-sided shader tokens */ + tgsi_free_tokens(tokens); + } + tokens = new_tokens; + } + if (key->fs.aa_point) { + tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); + } + } + + if (SVGA_DEBUG & DEBUG_TGSI) { + debug_printf("#####################################\n"); + debug_printf("### TGSI Shader %u\n", shader->id); + tgsi_dump(tokens, 0); + } + + /** + * Rescan the header if the token string is different from the one + * included in the shader; otherwise, the header info is already up-to-date + */ + if (tokens != shader->tokens) { + tgsi_scan_shader(tokens, &emit->info); + } else { + emit->info = shader->info; + } + + emit->num_outputs = emit->info.num_outputs; + + if (unit == PIPE_SHADER_FRAGMENT) { + /* Compute FS input remapping to match the output from VS/GS */ + if (gs) { + svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); + } else { + assert(vs); + svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); + } + } else if (unit == PIPE_SHADER_GEOMETRY) { + assert(vs); + svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); + } + + determine_clipping_mode(emit); + + if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { + if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { + /* if there is stream output declarations associated + * with this shader or the shader writes to ClipDistance + * then reserve extra registers for the non-adjusted vertex position + * and the ClipDistance shadow copy + */ + emit->vposition.so_index = emit->num_outputs++; + + if (emit->clip_mode == CLIP_DISTANCE) { + emit->clip_dist_so_index = emit->num_outputs++; + if (emit->info.num_written_clipdistance > 4) + emit->num_outputs++; + } + } + } + + /* + * Do actual shader translation. + */ + if (!emit_vgpu10_header(emit)) { + debug_printf("svga: emit VGPU10 header failed\n"); + goto cleanup; + } + + if (!emit_vgpu10_instructions(emit, tokens)) { + debug_printf("svga: emit VGPU10 instructions failed\n"); + goto cleanup; + } + + if (!emit_vgpu10_tail(emit)) { + debug_printf("svga: emit VGPU10 tail failed\n"); + goto cleanup; + } + + if (emit->register_overflow) { + goto cleanup; + } + + /* + * Create, initialize the 'variant' object. + */ + variant = CALLOC_STRUCT(svga_shader_variant); + if (!variant) + goto cleanup; + + variant->shader = shader; + variant->nr_tokens = emit_get_num_tokens(emit); + variant->tokens = (const unsigned *)emit->buf; + emit->buf = NULL; /* buffer is no longer owed by emitter context */ + memcpy(&variant->key, key, sizeof(*key)); + variant->id = UTIL_BITMASK_INVALID_INDEX; + + /* The extra constant starting offset starts with the number of + * shader constants declared in the shader. + */ + variant->extra_const_start = emit->num_shader_consts[0]; + if (key->gs.wide_point) { + /** + * The extra constant added in the transformed shader + * for inverse viewport scale is to be supplied by the driver. + * So the extra constant starting offset needs to be reduced by 1. + */ + assert(variant->extra_const_start > 0); + variant->extra_const_start--; + } + + variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + + /** keep track in the variant if flat interpolation is used + * for any of the varyings. + */ + variant->uses_flat_interp = emit->uses_flat_interp; + + if (tokens != shader->tokens) { + tgsi_free_tokens(tokens); + } + +cleanup: + free_emitter(emit); + + return variant; +}