X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_tgsi_insn.c;h=c7804b2680f4edc1022b64fa26a007c45f3c3c05;hb=81371a59093d59963a43b7f1becbed9d3c657e45;hp=4d78e96375cf756282913affb988caf6854b77d0;hpb=a15eb1967671f9c2f58d22c5ef8f4b53806f9597;p=mesa.git diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 4d78e96375c..c7804b2680f 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -29,6 +29,7 @@ #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_pstipple.h" #include "svga_tgsi_emit.h" #include "svga_context.h" @@ -38,13 +39,11 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); -static unsigned -translate_opcode(uint opcode) +static SVGA3dShaderOpCodeType +translate_opcode(enum tgsi_opcode opcode) { switch (opcode) { - case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; - case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; @@ -54,7 +53,6 @@ translate_opcode(uint opcode) case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; - case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; default: assert(!"svga: unexpected opcode in translate_opcode()"); return SVGA3DOP_LAST_INST; @@ -62,8 +60,8 @@ translate_opcode(uint opcode) } -static unsigned -translate_file(unsigned file) +static SVGA3dShaderRegType +translate_file(enum tgsi_file_type file) { switch (file) { case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; @@ -99,6 +97,7 @@ translate_dst_register( struct svga_shader_emitter *emit, * Need to lookup a table built at decl time: */ dest = emit->output_map[reg->Register.Index]; + emit->num_output_writes++; break; default: @@ -165,7 +164,7 @@ scalar(struct src_register src, unsigned comp) static boolean svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -178,7 +177,7 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) static int svga_arl_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -287,8 +286,10 @@ static SVGA3dShaderDestToken get_temp( struct svga_shader_emitter *emit ) { int i = emit->nr_hw_temp + emit->internal_temp_count++; - assert(i < SVGA3D_TEMPREG_MAX); - i = MIN2(i, SVGA3D_TEMPREG_MAX - 1); + if (i >= SVGA3D_TEMPREG_MAX) { + debug_warn_once("svga: Too many temporary registers used in shader\n"); + i = SVGA3D_TEMPREG_MAX - 1; + } return dst_register( SVGA3DREG_TEMP, i ); } @@ -666,6 +667,7 @@ submit_op4(struct svga_shader_emitter *emit, * in one slot at least: */ assert(type1 == SVGA3DREG_SAMPLER); + (void) type1; if (type0 == SVGA3DREG_CONST && ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || @@ -764,7 +766,7 @@ emit_def_const(struct svga_shader_emitter *emit, } if (!emit_instruction(emit, opcode) || - !svga_shader_emit_dwords( emit, def.values, Elements(def.values))) + !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values))) return FALSE; return TRUE; @@ -857,8 +859,20 @@ create_common_immediate( struct svga_shader_emitter *emit ) if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0.0f, 0.5f, -1.0f, 1.0f )) return FALSE; + emit->common_immediate_idx[0] = idx; + idx++; + + /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ + if (emit->key.vs.adjust_attrib_range) { + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 2.0f, 0.0f, 0.0f, 0.0f )) + return FALSE; + emit->common_immediate_idx[1] = idx; + } + else { + emit->common_immediate_idx[1] = -1; + } - emit->common_immediate_idx = idx; emit->created_common_immediate = TRUE; return TRUE; @@ -887,7 +901,7 @@ common_immediate_swizzle(float value) /** - * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5 + * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 */ static struct src_register get_immediate(struct svga_shader_emitter *emit, @@ -898,8 +912,8 @@ get_immediate(struct svga_shader_emitter *emit, unsigned sz = common_immediate_swizzle(z); unsigned sw = common_immediate_swizzle(w); assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), sx, sy, sz, sw); } @@ -911,9 +925,9 @@ static struct src_register get_zero_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 0, 0, 0, 0); } @@ -925,9 +939,9 @@ static struct src_register get_one_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 3, 3, 3, 3); } @@ -939,12 +953,27 @@ static struct src_register get_half_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 1, 1, 1, 1); } +/** + * returns {2, 2, 2, 2} immediate + */ +static struct src_register +get_two_immediate( struct svga_shader_emitter *emit ) +{ + /* Note we use the second common immediate here */ + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[1] >= 0); + return swizzle(src_register( SVGA3DREG_CONST, + emit->common_immediate_idx[1]), + 0, 0, 0, 0); +} + + /** * returns the loop const */ @@ -987,7 +1016,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) struct src_register reg; /* the width/height indexes start right after constants */ - idx = emit->key.fkey.tex[sampler_num].width_height_idx + + idx = emit->key.tex[sampler_num].width_height_idx + emit->info.file_max[TGSI_FILE_CONSTANT] + 1; reg = src_register( SVGA3DREG_CONST, idx ); @@ -1145,7 +1174,7 @@ emit_div(struct svga_shader_emitter *emit, const struct src_register src1 = translate_src_register(emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); - int i; + unsigned i; /* For each enabled element, perform a RCP instruction. Note that * RCP is scalar in SVGA3D: @@ -1207,73 +1236,6 @@ emit_dp2(struct svga_shader_emitter *emit, } -/** - * Translate the following TGSI DPH instruction. - * DPH DST, SRC1, SRC2 - * To the following SVGA3D instruction sequence. - * DP3 TMP, SRC1, SRC2 - * ADD DST, TMP, SRC2.wwww - */ -static boolean -emit_dph(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn ) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = - translate_src_register(emit, &insn->Src[1]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* DP3 TMP, SRC1, SRC2 */ - if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) - return FALSE; - - src1 = scalar(src1, TGSI_SWIZZLE_W); - - /* ADD DST, TMP, SRC2.wwww */ - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src( temp ), src1 )) - return FALSE; - - return TRUE; -} - - -/** - * Translate the following TGSI DST instruction. - * NRM DST, SRC - * To the following SVGA3D instruction sequence. - * DP3 TMP, SRC, SRC - * RSQ TMP, TMP - * MUL DST, SRC, TMP - */ -static boolean -emit_nrm(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = - translate_src_register(emit, &insn->Src[0]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* DP3 TMP, SRC, SRC */ - if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 )) - return FALSE; - - /* RSQ TMP, TMP */ - if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp ))) - return FALSE; - - /* MUL DST, SRC, TMP */ - if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, - src0, src( temp ))) - return FALSE; - - return TRUE; -} - - /** * Sine / Cosine helper function. */ @@ -1287,29 +1249,6 @@ do_emit_sincos(struct svga_shader_emitter *emit, } -/** - * Translate/emit a TGSI SIN, COS or CSC instruction. - */ -static boolean -emit_sincos(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register(emit, &insn->Src[0]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* SCS TMP SRC */ - if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) - return FALSE; - - /* MOV DST TMP */ - if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) - return FALSE; - - return TRUE; -} - - /** * Translate TGSI SIN instruction into: * SCS TMP SRC @@ -1406,30 +1345,6 @@ emit_ssg(struct svga_shader_emitter *emit, } -/** - * Translate/emit TGSI SUB instruction as: - * ADD DST, SRC0, negate(SRC1) - */ -static boolean -emit_sub(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - - src1 = negate(src1); - - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src0, src1 )) - return FALSE; - - return TRUE; -} - - /** * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). */ @@ -1520,7 +1435,7 @@ same_register(struct src_register r1, struct src_register r2) */ static boolean emit_conditional(struct svga_shader_emitter *emit, - unsigned compare_func, + enum pipe_compare_func compare_func, SVGA3dShaderDestToken dst, struct src_register src0, struct src_register src1, @@ -1528,8 +1443,7 @@ emit_conditional(struct svga_shader_emitter *emit, struct src_register fail) { SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); - SVGA3dShaderInstToken setp_token, mov_token; - setp_token = inst_token( SVGA3DOP_SETP ); + SVGA3dShaderInstToken setp_token; switch (compare_func) { case PIPE_FUNC_NEVER: @@ -1537,22 +1451,22 @@ emit_conditional(struct svga_shader_emitter *emit, dst, fail ); break; case PIPE_FUNC_LESS: - setp_token.control = SVGA3DOPCOMP_LT; + setp_token = inst_token_setp(SVGA3DOPCOMP_LT); break; case PIPE_FUNC_EQUAL: - setp_token.control = SVGA3DOPCOMP_EQ; + setp_token = inst_token_setp(SVGA3DOPCOMP_EQ); break; case PIPE_FUNC_LEQUAL: - setp_token.control = SVGA3DOPCOMP_LE; + setp_token = inst_token_setp(SVGA3DOPCOMP_LE); break; case PIPE_FUNC_GREATER: - setp_token.control = SVGA3DOPCOMP_GT; + setp_token = inst_token_setp(SVGA3DOPCOMP_GT); break; case PIPE_FUNC_NOTEQUAL: - setp_token.control = SVGA3DOPCOMPC_NE; + setp_token = inst_token_setp(SVGA3DOPCOMPC_NE); break; case PIPE_FUNC_GEQUAL: - setp_token.control = SVGA3DOPCOMP_GE; + setp_token = inst_token_setp(SVGA3DOPCOMP_GE); break; case PIPE_FUNC_ALWAYS: return submit_op1( emit, inst_token( SVGA3DOP_MOV ), @@ -1575,11 +1489,8 @@ emit_conditional(struct svga_shader_emitter *emit, src0, src1 )) return FALSE; - mov_token = inst_token( SVGA3DOP_MOV ); - /* MOV dst, fail */ - if (!submit_op1( emit, mov_token, dst, - fail )) + if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail)) return FALSE; /* MOV dst, pass (predicated) @@ -1587,9 +1498,9 @@ emit_conditional(struct svga_shader_emitter *emit, * Note that the predicate reg (and possible modifiers) is passed * as the first source argument. */ - mov_token.predicated = 1; - if (!submit_op2( emit, mov_token, dst, - src( pred_reg ), pass )) + if (!submit_op2(emit, + inst_token_predicated(SVGA3DOP_MOV), dst, + src(pred_reg), pass)) return FALSE; return TRUE; @@ -1605,7 +1516,7 @@ emit_conditional(struct svga_shader_emitter *emit, */ static boolean emit_select(struct svga_shader_emitter *emit, - unsigned compare_func, + enum pipe_compare_func compare_func, SVGA3dShaderDestToken dst, struct src_register src0, struct src_register src1 ) @@ -1733,7 +1644,7 @@ emit_tex2(struct svga_shader_emitter *emit, texcoord = translate_src_register( emit, &insn->Src[0] ); sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[sampler.base.num].unnormalized || + if (emit->key.tex[sampler.base.num].unnormalized || emit->dynamic_branching_level > 0) tmp = get_temp( emit ); @@ -1765,7 +1676,7 @@ emit_tex2(struct svga_shader_emitter *emit, /* Explicit normalization of texcoords: */ - if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + if (emit->key.tex[sampler.base.num].unnormalized) { struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ @@ -1830,15 +1741,15 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; unsigned srcSwizzle[4]; unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; - int i; + unsigned i; /* build writemasks and srcSwizzle terms */ for (i = 0; i < 4; i++) { - if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) { + if (swizzleIn[i] == PIPE_SWIZZLE_0) { srcSwizzle[i] = TGSI_SWIZZLE_X + i; zeroWritemask |= (1 << i); } - else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) { + else if (swizzleIn[i] == PIPE_SWIZZLE_1) { srcSwizzle[i] = TGSI_SWIZZLE_X + i; oneWritemask |= (1 << i); } @@ -1901,16 +1812,16 @@ emit_tex(struct svga_shader_emitter *emit, const unsigned unit = src1.base.num; /* check for shadow samplers */ - boolean compare = (emit->key.fkey.tex[unit].compare_mode == + boolean compare = (emit->key.tex[unit].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE); /* texture swizzle */ - boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED || - emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN || - emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || - emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); + boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X || + emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y || + emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z || + emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W); - boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE; + boolean saturate = insn->Instruction.Saturate; /* If doing compare processing or tex swizzle or saturation, we need to put * the fetched color into a temporary so it can be used as a source later on. @@ -1975,7 +1886,7 @@ emit_tex(struct svga_shader_emitter *emit, /* Compare texture sample value against R component of texcoord */ if (!emit_select(emit, - emit->key.fkey.tex[unit].compare_func, + emit->key.tex[unit].compare_func, writemask( dst2, TGSI_WRITEMASK_XYZ ), r_coord, tex_src_x)) @@ -2001,10 +1912,10 @@ emit_tex(struct svga_shader_emitter *emit, /* swizzle from tex_result to dst (handles saturation too, if any) */ emit_tex_swizzle(emit, dst, src(tex_result), - emit->key.fkey.tex[unit].swizzle_r, - emit->key.fkey.tex[unit].swizzle_g, - emit->key.fkey.tex[unit].swizzle_b, - emit->key.fkey.tex[unit].swizzle_a); + emit->key.tex[unit].swizzle_r, + emit->key.tex[unit].swizzle_g, + emit->key.tex[unit].swizzle_b, + emit->key.tex[unit].swizzle_a); } return TRUE; @@ -2057,7 +1968,7 @@ emit_brk(struct svga_shader_emitter *emit, */ static boolean emit_scalar_op1(struct svga_shader_emitter *emit, - unsigned opcode, + SVGA3dShaderOpCodeType opcode, const struct tgsi_full_instruction *insn) { SVGA3dShaderInstToken inst; @@ -2079,7 +1990,7 @@ emit_scalar_op1(struct svga_shader_emitter *emit, */ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, - unsigned opcode, + SVGA3dShaderOpCodeType opcode, const struct tgsi_full_instruction *insn) { const struct tgsi_full_src_register *src = insn->Src; @@ -2111,6 +2022,29 @@ emit_simple_instruction(struct svga_shader_emitter *emit, } +/** + * TGSI_OPCODE_MOVE is only special-cased here to detect the + * svga_fragment_shader::constant_color_output case. + */ +static boolean +emit_mov(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct tgsi_full_src_register *src = &insn->Src[0]; + const struct tgsi_full_dst_register *dst = &insn->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); +} + + /** * Translate/emit TGSI DDX, DDY instructions. */ @@ -2136,7 +2070,7 @@ emit_deriv(struct svga_shader_emitter *emit, return TRUE; } else { - unsigned opcode; + SVGA3dShaderOpCodeType opcode; const struct tgsi_full_src_register *reg = &insn->Src[0]; SVGA3dShaderInstToken inst; SVGA3dShaderDestToken dst; @@ -2238,63 +2172,6 @@ emit_pow(struct svga_shader_emitter *emit, } -/** - * Translate/emit TGSI XPD (vector cross product) instruction. - */ -static boolean -emit_xpd(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - const struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - boolean need_dst_tmp = FALSE; - - /* XPD can only output to a temporary */ - if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) - need_dst_tmp = TRUE; - - /* The dst reg must not be the same as src0 or src1*/ - if (alias_src_dst(src0, dst) || - alias_src_dst(src1, dst)) - need_dst_tmp = TRUE; - - if (need_dst_tmp) { - SVGA3dShaderDestToken tmp = get_temp( emit ); - - /* Obey DX9 restrictions on mask: - */ - tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; - - if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) - return FALSE; - - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; - } - else { - if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) - return FALSE; - } - - /* Need to emit 1.0 to dst.w? - */ - if (dst.mask & TGSI_WRITEMASK_W) { - struct src_register one = get_one_immediate( emit ); - - if (!submit_op1(emit, - inst_token( SVGA3DOP_MOV ), - writemask(dst, TGSI_WRITEMASK_W), - one)) - return FALSE; - } - - return TRUE; -} - - /** * Emit a LRP (linear interpolation) instruction. */ @@ -2558,14 +2435,8 @@ emit_lit(struct svga_shader_emitter *emit, */ { SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); - SVGA3dShaderInstToken setp_token, mov_token; struct src_register predsrc; - setp_token = inst_token( SVGA3DOP_SETP ); - mov_token = inst_token( SVGA3DOP_MOV ); - - setp_token.control = SVGA3DOPCOMP_GT; - /* D3D vs GL semantics: */ if (0) @@ -2574,7 +2445,9 @@ emit_lit(struct svga_shader_emitter *emit, predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ /* SETP src0.xxyy, GT, {0}.x */ - if (!submit_op2( emit, setp_token, pred_reg, + if (!submit_op2( emit, + inst_token_setp(SVGA3DOPCOMP_GT), + pred_reg, predsrc, get_zero_immediate(emit))) return FALSE; @@ -2590,8 +2463,8 @@ emit_lit(struct svga_shader_emitter *emit, * as the first source argument. */ if (dst.mask & TGSI_WRITEMASK_YZ) { - mov_token.predicated = 1; - if (!submit_op2( emit, mov_token, + if (!submit_op2( emit, + inst_token_predicated(SVGA3DOP_MOV), writemask(dst, TGSI_WRITEMASK_YZ), src( pred_reg ), src( tmp ) )) return FALSE; @@ -2855,7 +2728,7 @@ emit_call(struct svga_shader_emitter *emit, break; } - if (emit->nr_labels == Elements(emit->label)) + if (emit->nr_labels == ARRAY_SIZE(emit->label)) return FALSE; if (i == emit->nr_labels) { @@ -2938,21 +2811,12 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_DP2: return emit_dp2( emit, insn ); - case TGSI_OPCODE_DPH: - return emit_dph( emit, insn ); - - case TGSI_OPCODE_NRM: - return emit_nrm( emit, insn ); - case TGSI_OPCODE_COS: return emit_cos( emit, insn ); case TGSI_OPCODE_SIN: return emit_sin( emit, insn ); - case TGSI_OPCODE_SCS: - return emit_sincos( emit, insn ); - case TGSI_OPCODE_END: /* TGSI always finishes the main func with an END */ return emit_end( emit ); @@ -2981,9 +2845,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SLE: return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); - case TGSI_OPCODE_SUB: - return emit_sub( emit, insn ); - case TGSI_OPCODE_POW: return emit_pow( emit, insn ); @@ -3019,7 +2880,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, /* These aren't actually used by any of the frontends we care * about: */ - case TGSI_OPCODE_CLAMP: case TGSI_OPCODE_AND: case TGSI_OPCODE_OR: case TGSI_OPCODE_I2F: @@ -3043,9 +2903,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_BRK: return emit_brk( emit, insn ); - case TGSI_OPCODE_XPD: - return emit_xpd( emit, insn ); - case TGSI_OPCODE_KILL: return emit_kill( emit, insn ); @@ -3061,9 +2918,13 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SSG: return emit_ssg( emit, insn ); + case TGSI_OPCODE_MOV: + return emit_mov( emit, insn ); + default: { - unsigned opcode = translate_opcode(insn->Instruction.Opcode); + SVGA3dShaderOpCodeType opcode = + translate_opcode(insn->Instruction.Opcode); if (opcode == SVGA3DOP_LAST_INST) return FALSE; @@ -3130,7 +2991,7 @@ make_immediate(struct svga_shader_emitter *emit, static boolean emit_vs_preamble(struct svga_shader_emitter *emit) { - if (!emit->key.vkey.need_prescale) { + if (!emit->key.vs.need_prescale) { if (!make_immediate( emit, 0, 0, .5, .5, &emit->imm_0055)) return FALSE; @@ -3207,7 +3068,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit) * logicop workaround. */ if (emit->unit == PIPE_SHADER_FRAGMENT && - emit->key.fkey.white_fragments) { + emit->key.fs.white_fragments) { struct src_register one = get_one_immediate(emit); if (!submit_op1( emit, @@ -3217,7 +3078,7 @@ emit_ps_postamble(struct svga_shader_emitter *emit) return FALSE; } else if (emit->unit == PIPE_SHADER_FRAGMENT && - i < emit->key.fkey.write_color0_to_n_cbufs) { + i < emit->key.fs.write_color0_to_n_cbufs) { /* Write temp color output [0] to true output [i] */ if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), emit->true_color_output[i], @@ -3261,7 +3122,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit) /* Need to perform various manipulations on vertex position to cope * with the different GL and D3D clip spaces. */ - if (emit->key.vkey.need_prescale) { + if (emit->key.vs.need_prescale) { SVGA3dShaderDestToken temp_pos = emit->temp_pos; SVGA3dShaderDestToken depth = emit->depth_pos; SVGA3dShaderDestToken pos = emit->true_pos; @@ -3360,7 +3221,7 @@ emit_light_twoside(struct svga_shader_emitter *emit) struct src_register back[2]; SVGA3dShaderDestToken color[2]; int count = emit->internal_color_count; - int i; + unsigned i; SVGA3dShaderInstToken if_token; if (count == 0) @@ -3389,7 +3250,7 @@ emit_light_twoside(struct svga_shader_emitter *emit) if_token = inst_token( SVGA3DOP_IFC ); - if (emit->key.fkey.front_ccw) + if (emit->key.fs.front_ccw) if_token.control = SVGA3DOPCOMP_LT; else if_token.control = SVGA3DOPCOMP_GT; @@ -3440,7 +3301,7 @@ emit_frontface(struct svga_shader_emitter *emit) temp = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); - if (emit->key.fkey.front_ccw) { + if (emit->key.fs.front_ccw) { pass = get_zero_immediate(emit); fail = get_one_immediate(emit); } else { @@ -3476,12 +3337,12 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) assert(emit->inverted_texcoords & (1 << unit)); - assert(unit < Elements(emit->ps_true_texcoord)); + assert(unit < ARRAY_SIZE(emit->ps_true_texcoord)); - assert(unit < Elements(emit->ps_inverted_texcoord_input)); + assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input)); assert(emit->ps_inverted_texcoord_input[unit] - < Elements(emit->input_map)); + < ARRAY_SIZE(emit->input_map)); /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ if (!submit_op3(emit, @@ -3503,6 +3364,95 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) } +/** + * Emit code to adjust vertex shader inputs/attributes: + * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs). + * - Set attrib W component = 1. + */ +static boolean +emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) +{ + unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | + emit->key.vs.adjust_attrib_w_1); + + while (adjust_mask) { + /* Adjust vertex attrib range and/or set W component = 1 */ + const unsigned index = u_bit_scan(&adjust_mask); + struct src_register tmp; + + /* allocate a temp reg */ + tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); + emit->nr_hw_temp++; + + if (emit->key.vs.adjust_attrib_range & (1 << index)) { + /* The vertex input/attribute is supposed to be a signed value in + * the range [-1,1] but we actually fetched/converted it to the + * range [0,1]. This most likely happens when the app specifies a + * signed byte attribute but we interpreted it as unsigned bytes. + * See also svga_translate_vertex_format(). + * + * Here, we emit some extra instructions to adjust + * the attribute values from [0,1] to [-1,1]. + * + * The adjustment we implement is: + * new_attrib = attrib * 2.0; + * if (attrib >= 0.5) + * new_attrib = new_attrib - 2.0; + * This isn't exactly right (it's off by a bit or so) but close enough. + */ + SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); + + /* tmp = attrib * 2.0 */ + if (!submit_op2(emit, + inst_token(SVGA3DOP_MUL), + dst(tmp), + emit->input_map[index], + get_two_immediate(emit))) + return FALSE; + + /* pred = (attrib >= 0.5) */ + if (!submit_op2(emit, + inst_token_setp(SVGA3DOPCOMP_GE), + pred_reg, + emit->input_map[index], /* vert attrib */ + get_half_immediate(emit))) /* 0.5 */ + return FALSE; + + /* sub(pred) tmp, tmp, 2.0 */ + if (!submit_op3(emit, + inst_token_predicated(SVGA3DOP_SUB), + dst(tmp), + src(pred_reg), + tmp, + get_two_immediate(emit))) + return FALSE; + } + else { + /* just copy the vertex input attrib to the temp register */ + if (!submit_op1(emit, + inst_token(SVGA3DOP_MOV), + dst(tmp), + emit->input_map[index])) + return FALSE; + } + + if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { + /* move 1 into W position of tmp */ + if (!submit_op1(emit, + inst_token(SVGA3DOP_MOV), + writemask(dst(tmp), TGSI_WRITEMASK_W), + get_one_immediate(emit))) + return FALSE; + } + + /* Reassign the input_map entry to the new tmp register */ + emit->input_map[index] = tmp; + } + + return TRUE; +} + + /** * Determine if we need to create the "common" immediate value which is * used for generating useful vector constants such as {0,0,0,0} and @@ -3516,10 +3466,10 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) unsigned i; if (emit->unit == PIPE_SHADER_FRAGMENT) { - if (emit->key.fkey.light_twoside) + if (emit->key.fs.light_twoside) return TRUE; - if (emit->key.fkey.white_fragments) + if (emit->key.fs.white_fragments) return TRUE; if (emit->emit_frontface) @@ -3533,25 +3483,27 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) if (emit->inverted_texcoords) return TRUE; - /* look for any PIPE_SWIZZLE_ZERO/ONE terms */ - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA) + /* look for any PIPE_SWIZZLE_0/ONE terms */ + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W) return TRUE; } - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].compare_mode + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) return TRUE; } } - - if (emit->unit == PIPE_SHADER_VERTEX) { + else if (emit->unit == PIPE_SHADER_VERTEX) { if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) return TRUE; + if (emit->key.vs.adjust_attrib_range || + emit->key.vs.adjust_attrib_w_1) + return TRUE; } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || @@ -3567,7 +3519,6 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1) return TRUE; @@ -3596,7 +3547,7 @@ static boolean pre_parse_add_indirect( struct svga_shader_emitter *emit, int num, int current_arl) { - int i; + unsigned i; assert(num < 0); for (i = 0; i < emit->num_arl_consts; ++i) { @@ -3695,10 +3646,13 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) } if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!svga_shader_emit_samplers_decl( emit )) + return FALSE; + if (!emit_ps_preamble( emit )) return FALSE; - if (emit->key.fkey.light_twoside) { + if (emit->key.fs.light_twoside) { if (!emit_light_twoside( emit )) return FALSE; } @@ -3711,6 +3665,15 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) return FALSE; } } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + if (emit->key.vs.adjust_attrib_range) { + if (!emit_adjusted_vertex_attribs(emit) || + emit->key.vs.adjust_attrib_w_1) { + return FALSE; + } + } + } return TRUE; } @@ -3725,10 +3688,31 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, const struct tgsi_token *tokens) { struct tgsi_parse_context parse; + const struct tgsi_token *new_tokens = NULL; boolean ret = TRUE; boolean helpers_emitted = FALSE; unsigned line_nr = 0; + if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { + unsigned unit; + + new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, + TGSI_FILE_INPUT); + + if (new_tokens) { + /* Setup texture state for stipple */ + emit->sampler_target[unit] = TGSI_TEXTURE_2D; + emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; + emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; + emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; + emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; + + emit->pstipple_sampler_unit = unit; + + tokens = new_tokens; + } + } + tgsi_parse_init( &parse, tokens ); emit->internal_imm_count = 0; @@ -3795,5 +3779,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, done: tgsi_parse_free( &parse ); + if (new_tokens) { + tgsi_free_tokens(new_tokens); + } + return ret; }