X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_tgsi_vgpu10.c;h=099ede6017dc2b3efda51ded8238e37352368187;hb=b4d47e21d7004412a16c12b762239dbeee665752;hp=423f215e53fa070d48f692a9591c4c754606fd82;hpb=5512f943b87c2ebe01c410cf3af9af8eb31dcb17;p=mesa.git diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 423f215e53f..099ede6017d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -185,6 +185,11 @@ struct svga_shader_emitter_v10 /** Which texture units are doing shadow comparison in the FS code */ unsigned shadow_compare_units; + + unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */ + + unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */ + unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */ } fs; /* For geometry shaders only */ @@ -410,6 +415,9 @@ check_register_index(struct svga_shader_emitter_v10 *emit, emit->register_overflow = TRUE; } break; + case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + /* nothing */ + break; default: assert(0); ; /* nothing */ @@ -644,6 +652,19 @@ translate_opcode(enum tgsi_opcode opcode) return VGPU10_OPCODE_LT; case TGSI_OPCODE_ROUND: return VGPU10_OPCODE_ROUND_NE; + case TGSI_OPCODE_SAMPLE_POS: + /* Note: we never actually get this opcode because there's no GLSL + * function to query multisample resource sample positions. There's + * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the + * position of the current sample in the render target. + */ + /* FALL-THROUGH */ + case TGSI_OPCODE_SAMPLE_INFO: + /* NOTE: we never actually get this opcode because the GLSL compiler + * implements the gl_NumSamples variable with a simple constant in the + * constant buffer. + */ + /* FALL-THROUGH */ default: assert(!"Unexpected TGSI opcode in translate_opcode()"); return VGPU10_OPCODE_NOP; @@ -903,6 +924,15 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, emit_dword(emit, operand0.value); return; } + else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) { + /* Fragment sample mask output */ + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; + emit_dword(emit, operand0.value); + return; + } else if (index == emit->fs.color_out_index[0] && emit->fs.color_tmp_index != INVALID_INDEX) { /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the @@ -965,6 +995,9 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, /** * Translate a src register of a TGSI instruction and emit VGPU10 tokens. + * In quite a few cases, we do register substitution. For example, if + * the TGSI register is the front/back-face register, we replace that with + * a temp register containing a value we computed earlier. */ static void emit_src_register(struct svga_shader_emitter_v10 *emit, @@ -1005,6 +1038,20 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, index = emit->linkage.input_map[index]; } } + else if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == emit->fs.sample_pos_sys_index) { + assert(emit->version >= 41); + /* Current sample position is in a temp register */ + file = TGSI_FILE_TEMPORARY; + index = emit->fs.sample_pos_tmp_index; + } + else { + /* Map the TGSI system value to a VGPU10 input register */ + assert(index < ARRAY_SIZE(emit->system_value_indexes)); + file = TGSI_FILE_INPUT; + index = emit->system_value_indexes[index]; + } + } } else if (emit->unit == PIPE_SHADER_GEOMETRY) { if (file == TGSI_FILE_INPUT) { @@ -1027,7 +1074,9 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } else if (file == TGSI_FILE_SYSTEM_VALUE) { + /* Map the TGSI system value to a VGPU10 input register */ assert(index < ARRAY_SIZE(emit->system_value_indexes)); + file = TGSI_FILE_INPUT; index = emit->system_value_indexes[index]; } } @@ -1195,6 +1244,32 @@ emit_face_register(struct svga_shader_emitter_v10 *emit) } +/** + * Emit tokens for the "rasterizer" register used by the SAMPLE_POS + * instruction. + */ +static void +emit_rasterizer_register(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OperandToken0 operand0; + + /* init */ + operand0.value = 0; + + /* No register index for rasterizer index (there's only one) */ + operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_Y; + operand0.swizzleZ = VGPU10_COMPONENT_Z; + operand0.swizzleW = VGPU10_COMPONENT_W; + + emit_dword(emit, operand0.value); +} + + /** * Emit the token for a VGPU10 opcode. * \param saturate clamp result to [0,1]? @@ -1384,6 +1459,29 @@ make_src_reg(enum tgsi_file_type file, unsigned index) } +/** + * Create a tgsi_full_src_register with a swizzle such that all four + * vector components have the same scalar value. + */ +static struct tgsi_full_src_register +make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component) +{ + struct tgsi_full_src_register reg; + + assert(component >= TGSI_SWIZZLE_X); + assert(component <= TGSI_SWIZZLE_W); + + memset(®, 0, sizeof(reg)); + reg.Register.File = file; + reg.Register.Index = index; + reg.Register.SwizzleX = + reg.Register.SwizzleY = + reg.Register.SwizzleZ = + reg.Register.SwizzleW = component; + return reg; +} + + /** * Create a tgsi_full_src_register for a temporary. */ @@ -1792,7 +1890,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, static unsigned alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) { - const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; + const unsigned n = emit->linkage.input_map_max + 1 + index; assert(index < ARRAY_SIZE(emit->system_value_indexes)); emit->system_value_indexes[index] = n; return n; @@ -1860,13 +1958,25 @@ translate_interpolation(const struct svga_shader_emitter_v10 *emit, case TGSI_INTERPOLATE_CONSTANT: return VGPU10_INTERPOLATION_CONSTANT; case TGSI_INTERPOLATE_LINEAR: - return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? - VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : - VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; + if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { + return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID; + } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && + emit->version >= 41) { + return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE; + } else { + return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; + } + break; case TGSI_INTERPOLATE_PERSPECTIVE: - return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? - VGPU10_INTERPOLATION_LINEAR_CENTROID : - VGPU10_INTERPOLATION_LINEAR; + if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { + return VGPU10_INTERPOLATION_LINEAR_CENTROID; + } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && + emit->version >= 41) { + return VGPU10_INTERPOLATION_LINEAR_SAMPLE; + } else { + return VGPU10_INTERPOLATION_LINEAR; + } + break; default: assert(!"Unexpected interpolation mode"); return VGPU10_INTERPOLATION_CONSTANT; @@ -2012,7 +2122,9 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit, unsigned index, unsigned size) { assert(opcode0.opcodeType); - assert(operand0.mask); + assert(operand0.mask || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || + (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK)); begin_emit_instruction(emit); emit_dword(emit, opcode0.value); @@ -2074,6 +2186,7 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit, assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || + opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); assert(operandType == VGPU10_OPERAND_TYPE_INPUT || operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); @@ -2085,13 +2198,17 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit, name == VGPU10_NAME_INSTANCE_ID || name == VGPU10_NAME_VERTEX_ID || name == VGPU10_NAME_PRIMITIVE_ID || - name == VGPU10_NAME_IS_FRONT_FACE); + name == VGPU10_NAME_IS_FRONT_FACE || + name == VGPU10_NAME_SAMPLE_INDEX); + assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || interpMode == VGPU10_INTERPOLATION_CONSTANT || interpMode == VGPU10_INTERPOLATION_LINEAR || interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || - interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || + interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || + interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); check_register_index(emit, opcodeType, index); @@ -2178,7 +2295,32 @@ emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; - operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; + operand0.mask = 0; + + emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); +} + + +/** + * Emit the declaration for the fragment sample mask/coverage output. + */ +static void +emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10NameToken name_token; + + assert(emit->unit == PIPE_SHADER_FRAGMENT); + assert(emit->version >= 41); + + opcode0.value = operand0.value = name_token.value = 0; + + opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; + operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.mask = 0; emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); } @@ -2216,8 +2358,33 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT_MASK_X, VGPU10_INTERPOLATION_UNDEFINED); break; + case TGSI_SEMANTIC_SAMPLEID: + assert(emit->unit == PIPE_SHADER_FRAGMENT); + emit->fs.sample_id_sys_index = index; + index = alloc_system_value_index(emit, index); + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, + VGPU10_OPERAND_TYPE_INPUT, + VGPU10_OPERAND_INDEX_1D, + index, 1, + VGPU10_NAME_SAMPLE_INDEX, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_X, + VGPU10_INTERPOLATION_CONSTANT); + break; + case TGSI_SEMANTIC_SAMPLEPOS: + /* This system value contains the position of the current sample + * when using per-sample shading. We implement this by calling + * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample + * index as the argument. See emit_sample_position_instructions(). + */ + assert(emit->version >= 41); + emit->fs.sample_pos_sys_index = index; + index = alloc_system_value_index(emit, index); + break; default: - ; /* XXX */ + debug_printf("unexpected sytem value semantic index %u\n", + semantic_name); } } @@ -2390,6 +2557,12 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit) interpolationMode = VGPU10_INTERPOLATION_CONSTANT; name = VGPU10_NAME_PRIMITIVE_ID; } + else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { + /* sample index / ID */ + type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; + interpolationMode = VGPU10_INTERPOLATION_CONSTANT; + name = VGPU10_NAME_SAMPLE_INDEX; + } else { /* general fragment input */ type = VGPU10_OPCODE_DCL_INPUT_PS; @@ -2553,6 +2726,10 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) /* Fragment depth output */ emit_fragdepth_output_declaration(emit); } + else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { + /* Fragment depth output */ + emit_samplemask_output_declaration(emit); + } else { assert(!"Bad output semantic name"); } @@ -2747,6 +2924,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) emit->fs.fragcoord_tmp_index = total_temps; total_temps += 1; } + + if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { + /* Allocate a temp for the sample position */ + emit->fs.sample_pos_tmp_index = total_temps++; + } } for (i = 0; i < emit->num_address_regs; i++) { @@ -2978,12 +3160,20 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) /** - * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. + * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. */ static unsigned tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, + unsigned num_samples, boolean is_array) { + if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { + target = TGSI_TEXTURE_2D; + } + else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) { + target = TGSI_TEXTURE_2D_ARRAY; + } + switch (target) { case TGSI_TEXTURE_BUFFER: return VGPU10_RESOURCE_DIMENSION_BUFFER; @@ -3017,7 +3207,8 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; + return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; default: assert(!"Unexpected resource type"); return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; @@ -3069,6 +3260,7 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; opcode0.resourceDimension = tgsi_texture_to_resource_dimension(emit->sampler_target[i], + emit->key.tex[i].num_samples, emit->key.tex[i].is_array); opcode0.sampleCount = emit->key.tex[i].num_samples; operand0.value = 0; @@ -4073,6 +4265,30 @@ emit_lit(struct svga_shader_emitter_v10 *emit, } +/** + * Emit Level Of Detail Query (LODQ) instruction. + */ +static boolean +emit_lodq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[1].Register.Index; + + assert(emit->version >= 41); + + /* LOD dst, coord, resource, sampler */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &inst->Src[0]); /* coord */ + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + end_emit_instruction(emit); + + return TRUE; +} + + /** * Emit code for TGSI_OPCODE_LOG instruction. */ @@ -4751,7 +4967,7 @@ setup_texcoord(struct svga_shader_emitter_v10 *emit, unsigned unit, const struct tgsi_full_src_register *coord) { - if (emit->key.tex[unit].unnormalized) { + if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) { unsigned scale_index = emit->texcoord_scale_index[unit]; unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); @@ -5114,6 +5330,60 @@ emit_tex(struct svga_shader_emitter_v10 *emit, return TRUE; } +/** + * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) + */ +static boolean +emit_tg4(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[2].Register.Index; + struct tgsi_full_src_register src; + int offsets[3]; + + /* check that the sampler returns a float */ + if (!is_valid_tex_instruction(emit, inst)) + return TRUE; + + /* Only a single channel is supported in SM4_1 and we report + * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. + * Only the 0th component will be gathered. + */ + switch (emit->key.tex[unit].swizzle_r) { + case PIPE_SWIZZLE_X: + get_texel_offsets(emit, inst, offsets); + src = setup_texcoord(emit, unit, &inst->Src[0]); + + /* Gather dst, coord, resource, sampler */ + begin_emit_instruction(emit); + emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, + inst->Instruction.Saturate, offsets); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &src); + emit_resource_register(emit, unit); + emit_sampler_register(emit, unit); + end_emit_instruction(emit); + break; + case PIPE_SWIZZLE_W: + case PIPE_SWIZZLE_1: + src = make_immediate_reg_float(emit, 1.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &inst->Dst[0], &src, FALSE); + break; + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_0: + default: + src = make_immediate_reg_float(emit, 0.0); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, + &inst->Dst[0], &src, FALSE); + break; + } + + return TRUE; +} + + /** * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) @@ -5268,7 +5538,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { const uint unit = inst->Src[1].Register.Index; - const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); + const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) + && emit->key.tex[unit].num_samples > 1; int offsets[3]; struct tex_swizzle_info swz_info; @@ -5277,6 +5548,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit, get_texel_offsets(emit, inst, offsets); if (msaa) { + assert(emit->key.tex[unit].num_samples > 1); + /* Fetch one sample from an MSAA texture */ struct tgsi_full_src_register sampleIndex = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); @@ -5636,6 +5909,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_lg2(emit, inst); case TGSI_OPCODE_LIT: return emit_lit(emit, inst); + case TGSI_OPCODE_LODQ: + return emit_lodq(emit, inst); case TGSI_OPCODE_LOG: return emit_log(emit, inst); case TGSI_OPCODE_LRP: @@ -5668,6 +5943,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_issg(emit, inst); case TGSI_OPCODE_TEX: return emit_tex(emit, inst); + case TGSI_OPCODE_TG4: + return emit_tg4(emit, inst); case TGSI_OPCODE_TEX2: return emit_tex2(emit, inst); case TGSI_OPCODE_TXP: @@ -6010,6 +6287,55 @@ emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) } +/** + * Emit the extra code to get the current sample position value and + * put it into a temp register. + */ +static void +emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) +{ + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { + assert(emit->version >= 41); + + struct tgsi_full_dst_register tmp_dst = + make_dst_temp_reg(emit->fs.sample_pos_tmp_index); + struct tgsi_full_src_register half = + make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0); + + struct tgsi_full_src_register tmp_src = + make_src_temp_reg(emit->fs.sample_pos_tmp_index); + struct tgsi_full_src_register sample_index_reg = + make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE, + emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X); + + /* The first src register is a shader resource (if we want a + * multisampled resource sample position) or the rasterizer register + * (if we want the current sample position in the color buffer). We + * want the later. + */ + + /* SAMPLE_POS dst, RASTERIZER, sampleIndex */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE); + emit_dst_register(emit, &tmp_dst); + emit_rasterizer_register(emit); + emit_src_register(emit, &sample_index_reg); + end_emit_instruction(emit); + + /* Convert from D3D coords to GL coords by adding 0.5 bias */ + /* ADD dst, dst, half */ + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE); + emit_dst_register(emit, &tmp_dst); + emit_src_register(emit, &tmp_src); + emit_src_register(emit, &half); + end_emit_instruction(emit); + } +} + + /** * Emit extra instructions to adjust VS inputs/attributes. This can * mean casting a vertex attribute from int to float or setting the @@ -6229,6 +6555,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) if (emit->unit == PIPE_SHADER_FRAGMENT) { emit_frontface_instructions(emit); emit_fragcoord_instructions(emit); + emit_sample_position_instructions(emit); } else if (emit->unit == PIPE_SHADER_VERTEX) { emit_vertex_attrib_instructions(emit); @@ -6625,6 +6952,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->fs.color_tmp_index = INVALID_INDEX; emit->fs.face_input_index = INVALID_INDEX; emit->fs.fragcoord_input_index = INVALID_INDEX; + emit->fs.sample_id_sys_index = INVALID_INDEX; + emit->fs.sample_pos_sys_index = INVALID_INDEX; emit->gs.prim_id_index = INVALID_INDEX; @@ -6686,6 +7015,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); } + /* Since vertex shader does not need to go through the linker to + * establish the input map, we need to make sure the highest index + * of input registers is set properly here. + */ + emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, + emit->info.file_max[TGSI_FILE_INPUT]); + determine_clipping_mode(emit); if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { @@ -6730,7 +7066,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, /* * Create, initialize the 'variant' object. */ - variant = svga_new_shader_variant(svga); + variant = svga_new_shader_variant(svga, unit); if (!variant) goto cleanup; @@ -6771,6 +7107,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + if (tokens != shader->tokens) { tgsi_free_tokens(tokens); }