X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_tgsi_insn.c;h=c7804b2680f4edc1022b64fa26a007c45f3c3c05;hb=81371a59093d59963a43b7f1becbed9d3c657e45;hp=2143546229370abd8829d0f0dde751ca36bf3348;hpb=384fd64ab1533df8256600733a1fc8413af3514a;p=mesa.git diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 21435462293..c7804b2680f 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -29,6 +29,7 @@ #include "tgsi/tgsi_parse.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_pstipple.h" #include "svga_tgsi_emit.h" #include "svga_context.h" @@ -38,13 +39,11 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); -static unsigned -translate_opcode(uint opcode) +static SVGA3dShaderOpCodeType +translate_opcode(enum tgsi_opcode opcode) { switch (opcode) { - case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; - case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; @@ -54,17 +53,15 @@ translate_opcode(uint opcode) case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; - case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; default: - debug_printf("Unkown opcode %u\n", opcode); - assert( 0 ); + assert(!"svga: unexpected opcode in translate_opcode()"); return SVGA3DOP_LAST_INST; } } -static unsigned -translate_file(unsigned file) +static SVGA3dShaderRegType +translate_file(enum tgsi_file_type file) { switch (file) { case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; @@ -75,12 +72,17 @@ translate_file(unsigned file) case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; default: - assert( 0 ); + assert(!"svga: unexpected register file in translate_file()"); return SVGA3DREG_TEMP; } } +/** + * Translate a TGSI destination register to an SVGA3DShaderDestToken. + * \param insn the TGSI instruction + * \param idx which TGSI dest register to translate (usually (always?) zero) + */ static SVGA3dShaderDestToken translate_dst_register( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, @@ -95,6 +97,7 @@ translate_dst_register( struct svga_shader_emitter *emit, * Need to lookup a table built at decl time: */ dest = emit->output_map[reg->Register.Index]; + emit->num_output_writes++; break; default: @@ -161,7 +164,7 @@ scalar(struct src_register src, unsigned comp) static boolean svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -174,7 +177,7 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) static int svga_arl_adjustment( const struct svga_shader_emitter *emit ) { - int i; + unsigned i; for (i = 0; i < emit->num_arl_consts; ++i) { if (emit->arl_consts[i].arl_num == emit->current_arl) @@ -184,6 +187,9 @@ svga_arl_adjustment( const struct svga_shader_emitter *emit ) } +/** + * Translate a TGSI src register to a src_register. + */ static struct src_register translate_src_register( const struct svga_shader_emitter *emit, const struct tgsi_full_src_register *reg ) @@ -280,8 +286,10 @@ static SVGA3dShaderDestToken get_temp( struct svga_shader_emitter *emit ) { int i = emit->nr_hw_temp + emit->internal_temp_count++; - assert(i < SVGA3D_TEMPREG_MAX); - i = MIN2(i, SVGA3D_TEMPREG_MAX - 1); + if (i >= SVGA3D_TEMPREG_MAX) { + debug_warn_once("svga: Too many temporary registers used in shader\n"); + i = SVGA3D_TEMPREG_MAX - 1; + } return dst_register( SVGA3DREG_TEMP, i ); } @@ -300,6 +308,9 @@ release_temp( struct svga_shader_emitter *emit, } +/** + * Release all temps. + */ static void reset_temp_regs(struct svga_shader_emitter *emit) { @@ -472,6 +483,9 @@ emit_repl(struct svga_shader_emitter *emit, } +/** + * Submit/emit an instruction with zero operands. + */ static boolean submit_op0(struct svga_shader_emitter *emit, SVGA3dShaderInstToken inst, @@ -482,6 +496,9 @@ submit_op0(struct svga_shader_emitter *emit, } +/** + * Submit/emit an instruction with one operand. + */ static boolean submit_op1(struct svga_shader_emitter *emit, SVGA3dShaderInstToken inst, @@ -493,6 +510,8 @@ submit_op1(struct svga_shader_emitter *emit, /** + * Submit/emit an instruction with two operands. + * * SVGA shaders may not refer to >1 constant register in a single * instruction. This function checks for that usage and inserts a * move to temporary if detected. @@ -543,6 +562,8 @@ submit_op2(struct svga_shader_emitter *emit, /** + * Submit/emit an instruction with three operands. + * * SVGA shaders may not refer to >1 constant register in a single * instruction. This function checks for that usage and inserts a * move to temporary if detected. @@ -613,6 +634,8 @@ submit_op3(struct svga_shader_emitter *emit, /** + * Submit/emit an instruction with four operands. + * * SVGA shaders may not refer to >1 constant register in a single * instruction. This function checks for that usage and inserts a * move to temporary if detected. @@ -644,6 +667,7 @@ submit_op4(struct svga_shader_emitter *emit, * in one slot at least: */ assert(type1 == SVGA3DREG_SAMPLER); + (void) type1; if (type0 == SVGA3DREG_CONST && ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || @@ -706,42 +730,10 @@ alias_src_dst(struct src_register src, } -static boolean -submit_lrp(struct svga_shader_emitter *emit, - SVGA3dShaderDestToken dst, - struct src_register src0, - struct src_register src1, - struct src_register src2) -{ - SVGA3dShaderDestToken tmp; - boolean need_dst_tmp = FALSE; - - /* The dst reg must be a temporary, and not be the same as src0 or src2 */ - if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || - alias_src_dst(src0, dst) || - alias_src_dst(src2, dst)) - need_dst_tmp = TRUE; - - if (need_dst_tmp) { - tmp = get_temp( emit ); - tmp.mask = dst.mask; - } - else { - tmp = dst; - } - - if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) - return FALSE; - - if (need_dst_tmp) { - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; - } - - return TRUE; -} - - +/** + * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I] + * instructions. + */ static boolean emit_def_const(struct svga_shader_emitter *emit, SVGA3dShaderConstType type, @@ -774,28 +766,9 @@ emit_def_const(struct svga_shader_emitter *emit, } if (!emit_instruction(emit, opcode) || - !svga_shader_emit_dwords( emit, def.values, Elements(def.values))) - return FALSE; - - return TRUE; -} - - -static boolean -create_zero_immediate( struct svga_shader_emitter *emit ) -{ - unsigned idx = emit->nr_hw_float_const++; - - /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate - * other useful vectors. - */ - if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, - idx, 0, 0.5, -1, 1 )) + !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values))) return FALSE; - emit->zero_immediate_idx = idx; - emit->created_zero_immediate = TRUE; - return TRUE; } @@ -871,30 +844,105 @@ get_vface( struct svga_shader_emitter *emit ) /** - * returns {0, 0, 0, 1} immediate + * Create/emit a "common" constant with values {0, 0.5, -1, 1}. + * We can swizzle this to produce other useful constants such as + * {0, 0, 0, 0}, {1, 1, 1, 1}, etc. + */ +static boolean +create_common_immediate( struct svga_shader_emitter *emit ) +{ + unsigned idx = emit->nr_hw_float_const++; + + /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate + * other useful vectors. + */ + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 0.0f, 0.5f, -1.0f, 1.0f )) + return FALSE; + emit->common_immediate_idx[0] = idx; + idx++; + + /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ + if (emit->key.vs.adjust_attrib_range) { + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 2.0f, 0.0f, 0.0f, 0.0f )) + return FALSE; + emit->common_immediate_idx[1] = idx; + } + else { + emit->common_immediate_idx[1] = -1; + } + + emit->created_common_immediate = TRUE; + + return TRUE; +} + + +/** + * Return swizzle/position for the given value in the "common" immediate. + */ +static inline unsigned +common_immediate_swizzle(float value) +{ + if (value == 0.0f) + return TGSI_SWIZZLE_X; + else if (value == 0.5f) + return TGSI_SWIZZLE_Y; + else if (value == -1.0f) + return TGSI_SWIZZLE_Z; + else if (value == 1.0f) + return TGSI_SWIZZLE_W; + else { + assert(!"illegal value in common_immediate_swizzle"); + return TGSI_SWIZZLE_X; + } +} + + +/** + * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 + */ +static struct src_register +get_immediate(struct svga_shader_emitter *emit, + float x, float y, float z, float w) +{ + unsigned sx = common_immediate_swizzle(x); + unsigned sy = common_immediate_swizzle(y); + unsigned sz = common_immediate_swizzle(z); + unsigned sw = common_immediate_swizzle(w); + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), + sx, sy, sz, sw); +} + + +/** + * returns {0, 0, 0, 0} immediate */ static struct src_register get_zero_immediate( struct svga_shader_emitter *emit ) { - assert(emit->created_zero_immediate); - assert(emit->zero_immediate_idx >= 0); + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->zero_immediate_idx), - 0, 0, 0, 3); + emit->common_immediate_idx[0]), + 0, 0, 0, 0); } /** - * returns {1, 1, 1, -1} immediate + * returns {1, 1, 1, 1} immediate */ static struct src_register -get_pos_neg_one_immediate( struct svga_shader_emitter *emit ) +get_one_immediate( struct svga_shader_emitter *emit ) { - assert(emit->created_zero_immediate); - assert(emit->zero_immediate_idx >= 0); + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->zero_immediate_idx), - 3, 3, 3, 2); + emit->common_immediate_idx[0]), + 3, 3, 3, 3); } @@ -904,13 +952,28 @@ get_pos_neg_one_immediate( struct svga_shader_emitter *emit ) static struct src_register get_half_immediate( struct svga_shader_emitter *emit ) { - assert(emit->created_zero_immediate); - assert(emit->zero_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->zero_immediate_idx), + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 1, 1, 1, 1); } +/** + * returns {2, 2, 2, 2} immediate + */ +static struct src_register +get_two_immediate( struct svga_shader_emitter *emit ) +{ + /* Note we use the second common immediate here */ + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[1] >= 0); + return swizzle(src_register( SVGA3DREG_CONST, + emit->common_immediate_idx[1]), + 0, 0, 0, 0); +} + + /** * returns the loop const */ @@ -943,8 +1006,8 @@ get_fake_arl_const( struct svga_shader_emitter *emit ) /** - * Return the register which holds the current dimenions of the - * texture bound to the given sampler + * Return a register which holds the width and height of the texture + * currently bound to the given sampler. */ static struct src_register get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) @@ -953,7 +1016,7 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) struct src_register reg; /* the width/height indexes start right after constants */ - idx = emit->key.fkey.tex[sampler_num].width_height_idx + + idx = emit->key.tex[sampler_num].width_height_idx + emit->info.file_max[TGSI_FILE_CONSTANT] + 1; reg = src_register( SVGA3DREG_CONST, idx ); @@ -993,11 +1056,10 @@ emit_if(struct svga_shader_emitter *emit, { struct src_register src0 = translate_src_register(emit, &insn->Src[0]); - struct src_register zero = get_zero_immediate( emit ); + struct src_register zero = get_zero_immediate(emit); SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); if_token.control = SVGA3DOPCOMPC_NE; - zero = scalar(zero, TGSI_SWIZZLE_X); if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { /* @@ -1020,20 +1082,20 @@ emit_if(struct svga_shader_emitter *emit, static boolean -emit_endif(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_else(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { - emit->dynamic_branching_level--; - - return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); + return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); } static boolean -emit_else(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_endif(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { - return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); + emit->dynamic_branching_level--; + + return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); } @@ -1112,7 +1174,7 @@ emit_div(struct svga_shader_emitter *emit, const struct src_register src1 = translate_src_register(emit, &insn->Src[1] ); SVGA3dShaderDestToken temp = get_temp( emit ); - int i; + unsigned i; /* For each enabled element, perform a RCP instruction. Note that * RCP is scalar in SVGA3D: @@ -1175,72 +1237,8 @@ emit_dp2(struct svga_shader_emitter *emit, /** - * Translate the following TGSI DPH instruction. - * DPH DST, SRC1, SRC2 - * To the following SVGA3D instruction sequence. - * DP3 TMP, SRC1, SRC2 - * ADD DST, TMP, SRC2.wwww + * Sine / Cosine helper function. */ -static boolean -emit_dph(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn ) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = - translate_src_register(emit, &insn->Src[1]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* DP3 TMP, SRC1, SRC2 */ - if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) - return FALSE; - - src1 = scalar(src1, TGSI_SWIZZLE_W); - - /* ADD DST, TMP, SRC2.wwww */ - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src( temp ), src1 )) - return FALSE; - - return TRUE; -} - - -/** - * Translate the following TGSI DST instruction. - * NRM DST, SRC - * To the following SVGA3D instruction sequence. - * DP3 TMP, SRC, SRC - * RSQ TMP, TMP - * MUL DST, SRC, TMP - */ -static boolean -emit_nrm(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = - translate_src_register(emit, &insn->Src[0]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* DP3 TMP, SRC, SRC */ - if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 )) - return FALSE; - - /* RSQ TMP, TMP */ - if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp ))) - return FALSE; - - /* MUL DST, SRC, TMP */ - if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, - src0, src( temp ))) - return FALSE; - - return TRUE; -} - - static boolean do_emit_sincos(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst, @@ -1251,27 +1249,8 @@ do_emit_sincos(struct svga_shader_emitter *emit, } -static boolean -emit_sincos(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register(emit, &insn->Src[0]); - SVGA3dShaderDestToken temp = get_temp( emit ); - - /* SCS TMP SRC */ - if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) - return FALSE; - - /* MOV DST TMP */ - if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) - return FALSE; - - return TRUE; -} - - /** + * Translate TGSI SIN instruction into: * SCS TMP SRC * MOV DST TMP.yyyy */ @@ -1297,7 +1276,9 @@ emit_sin(struct svga_shader_emitter *emit, return TRUE; } + /* + * Translate TGSI COS instruction into: * SCS TMP SRC * MOV DST TMP.xxxx */ @@ -1324,6 +1305,9 @@ emit_cos(struct svga_shader_emitter *emit, } +/** + * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction. + */ static boolean emit_ssg(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -1341,9 +1325,8 @@ emit_ssg(struct svga_shader_emitter *emit, src( temp0 ), src( temp1 ) ); } - zero = get_zero_immediate( emit ); - one = scalar( zero, TGSI_SWIZZLE_W ); - zero = scalar( zero, TGSI_SWIZZLE_X ); + one = get_one_immediate(emit); + zero = get_zero_immediate(emit); /* CMP TMP0, SRC0, one, zero */ if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), @@ -1363,28 +1346,8 @@ emit_ssg(struct svga_shader_emitter *emit, /** - * ADD DST SRC0, negate(SRC0) + * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). */ -static boolean -emit_sub(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - - src1 = negate(src1); - - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src0, src1 )) - return FALSE; - - return TRUE; -} - - static boolean emit_kill_if(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -1426,15 +1389,15 @@ emit_kill_if(struct svga_shader_emitter *emit, /** - * unconditional kill + * Translate/emit unconditional kill instruction (usually found inside + * an IF/ELSE/ENDIF block). */ static boolean emit_kill(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { SVGA3dShaderDestToken temp; - struct src_register one = scalar( get_zero_immediate( emit ), - TGSI_SWIZZLE_W ); + struct src_register one = get_one_immediate(emit); SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL ); /* texkill doesn't allow negation on the operand so lets move @@ -1461,7 +1424,8 @@ same_register(struct src_register r1, struct src_register r2) -/* Implement conditionals by initializing destination reg to 'fail', +/** + * Implement conditionals by initializing destination reg to 'fail', * then set predicate reg with UFOP_SETP, then move 'pass' to dest * based on predicate reg. * @@ -1471,7 +1435,7 @@ same_register(struct src_register r1, struct src_register r2) */ static boolean emit_conditional(struct svga_shader_emitter *emit, - unsigned compare_func, + enum pipe_compare_func compare_func, SVGA3dShaderDestToken dst, struct src_register src0, struct src_register src1, @@ -1479,8 +1443,7 @@ emit_conditional(struct svga_shader_emitter *emit, struct src_register fail) { SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); - SVGA3dShaderInstToken setp_token, mov_token; - setp_token = inst_token( SVGA3DOP_SETP ); + SVGA3dShaderInstToken setp_token; switch (compare_func) { case PIPE_FUNC_NEVER: @@ -1488,22 +1451,22 @@ emit_conditional(struct svga_shader_emitter *emit, dst, fail ); break; case PIPE_FUNC_LESS: - setp_token.control = SVGA3DOPCOMP_LT; + setp_token = inst_token_setp(SVGA3DOPCOMP_LT); break; case PIPE_FUNC_EQUAL: - setp_token.control = SVGA3DOPCOMP_EQ; + setp_token = inst_token_setp(SVGA3DOPCOMP_EQ); break; case PIPE_FUNC_LEQUAL: - setp_token.control = SVGA3DOPCOMP_LE; + setp_token = inst_token_setp(SVGA3DOPCOMP_LE); break; case PIPE_FUNC_GREATER: - setp_token.control = SVGA3DOPCOMP_GT; + setp_token = inst_token_setp(SVGA3DOPCOMP_GT); break; case PIPE_FUNC_NOTEQUAL: - setp_token.control = SVGA3DOPCOMPC_NE; + setp_token = inst_token_setp(SVGA3DOPCOMPC_NE); break; case PIPE_FUNC_GEQUAL: - setp_token.control = SVGA3DOPCOMP_GE; + setp_token = inst_token_setp(SVGA3DOPCOMP_GE); break; case PIPE_FUNC_ALWAYS: return submit_op1( emit, inst_token( SVGA3DOP_MOV ), @@ -1526,11 +1489,8 @@ emit_conditional(struct svga_shader_emitter *emit, src0, src1 )) return FALSE; - mov_token = inst_token( SVGA3DOP_MOV ); - /* MOV dst, fail */ - if (!submit_op1( emit, mov_token, dst, - fail )) + if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail)) return FALSE; /* MOV dst, pass (predicated) @@ -1538,18 +1498,25 @@ emit_conditional(struct svga_shader_emitter *emit, * Note that the predicate reg (and possible modifiers) is passed * as the first source argument. */ - mov_token.predicated = 1; - if (!submit_op2( emit, mov_token, dst, - src( pred_reg ), pass )) + if (!submit_op2(emit, + inst_token_predicated(SVGA3DOP_MOV), dst, + src(pred_reg), pass)) return FALSE; return TRUE; } +/** + * Helper for emiting 'selection' commands. Basically: + * if (src0 OP src1) + * dst = 1.0; + * else + * dst = 0.0; + */ static boolean emit_select(struct svga_shader_emitter *emit, - unsigned compare_func, + enum pipe_compare_func compare_func, SVGA3dShaderDestToken dst, struct src_register src0, struct src_register src1 ) @@ -1577,21 +1544,17 @@ emit_select(struct svga_shader_emitter *emit, { struct src_register one, zero; /* zero immediate is 0,0,0,1 */ - zero = get_zero_immediate( emit ); - one = scalar( zero, TGSI_SWIZZLE_W ); - zero = scalar( zero, TGSI_SWIZZLE_X ); + zero = get_zero_immediate(emit); + one = get_one_immediate(emit); - return emit_conditional( - emit, - compare_func, - dst, - src0, - src1, - one, zero); + return emit_conditional(emit, compare_func, dst, src0, src1, one, zero); } } +/** + * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction. + */ static boolean emit_select_op(struct svga_shader_emitter *emit, unsigned compare, @@ -1608,7 +1571,8 @@ emit_select_op(struct svga_shader_emitter *emit, /** - * Translate TGSI CMP instruction. + * Translate TGSI CMP instruction. Component-wise: + * dst = (src0 < 0.0) ? src1 : src2 */ static boolean emit_cmp(struct svga_shader_emitter *emit, @@ -1623,8 +1587,7 @@ emit_cmp(struct svga_shader_emitter *emit, translate_src_register(emit, &insn->Src[2] ); if (emit->unit == PIPE_SHADER_VERTEX) { - struct src_register zero = - scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X); + struct src_register zero = get_zero_immediate(emit); /* We used to simulate CMP with SLT+LRP. But that didn't work when * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed * because it involves a CMP to handle the 0 case. @@ -1644,7 +1607,7 @@ emit_cmp(struct svga_shader_emitter *emit, /** - * Translate texture instructions to SVGA3D representation. + * Translate/emit 2-operand (coord, sampler) texture instructions. */ static boolean emit_tex2(struct svga_shader_emitter *emit, @@ -1681,7 +1644,7 @@ emit_tex2(struct svga_shader_emitter *emit, texcoord = translate_src_register( emit, &insn->Src[0] ); sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[sampler.base.num].unnormalized || + if (emit->key.tex[sampler.base.num].unnormalized || emit->dynamic_branching_level > 0) tmp = get_temp( emit ); @@ -1691,7 +1654,7 @@ emit_tex2(struct svga_shader_emitter *emit, if (emit->dynamic_branching_level > 0 && inst.op == SVGA3DOP_TEX && SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { - struct src_register zero = get_zero_immediate( emit ); + struct src_register zero = get_zero_immediate(emit); /* MOV tmp, texcoord */ if (!submit_op1( emit, @@ -1704,7 +1667,7 @@ emit_tex2(struct svga_shader_emitter *emit, if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), writemask( tmp, TGSI_WRITEMASK_W ), - scalar( zero, TGSI_SWIZZLE_X ))) + zero )) return FALSE; texcoord = src( tmp ); @@ -1713,7 +1676,7 @@ emit_tex2(struct svga_shader_emitter *emit, /* Explicit normalization of texcoords: */ - if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + if (emit->key.tex[sampler.base.num].unnormalized) { struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ @@ -1729,7 +1692,7 @@ emit_tex2(struct svga_shader_emitter *emit, /** - * Translate texture instructions to SVGA3D representation. + * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions. */ static boolean emit_tex4(struct svga_shader_emitter *emit, @@ -1763,7 +1726,8 @@ emit_tex4(struct svga_shader_emitter *emit, /** - * Emit texture swizzle code. + * Emit texture swizzle code. We do this here since SVGA samplers don't + * directly support swizzles. */ static boolean emit_tex_swizzle(struct svga_shader_emitter *emit, @@ -1777,15 +1741,15 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; unsigned srcSwizzle[4]; unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; - int i; + unsigned i; /* build writemasks and srcSwizzle terms */ for (i = 0; i < 4; i++) { - if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) { + if (swizzleIn[i] == PIPE_SWIZZLE_0) { srcSwizzle[i] = TGSI_SWIZZLE_X + i; zeroWritemask |= (1 << i); } - else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) { + else if (swizzleIn[i] == PIPE_SWIZZLE_1) { srcSwizzle[i] = TGSI_SWIZZLE_X + i; oneWritemask |= (1 << i); } @@ -1813,7 +1777,7 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), writemask(dst, zeroWritemask), - scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X))) + get_zero_immediate(emit))) return FALSE; } @@ -1822,7 +1786,7 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), writemask(dst, oneWritemask), - scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W))) + get_one_immediate(emit))) return FALSE; } @@ -1830,6 +1794,9 @@ emit_tex_swizzle(struct svga_shader_emitter *emit, } +/** + * Translate/emit a TGSI texture sample instruction. + */ static boolean emit_tex(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -1845,16 +1812,16 @@ emit_tex(struct svga_shader_emitter *emit, const unsigned unit = src1.base.num; /* check for shadow samplers */ - boolean compare = (emit->key.fkey.tex[unit].compare_mode == + boolean compare = (emit->key.tex[unit].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE); /* texture swizzle */ - boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED || - emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN || - emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || - emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); + boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X || + emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y || + emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z || + emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W); - boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE; + boolean saturate = insn->Instruction.Saturate; /* If doing compare processing or tex swizzle or saturation, we need to put * the fetched color into a temporary so it can be used as a source later on. @@ -1919,7 +1886,7 @@ emit_tex(struct svga_shader_emitter *emit, /* Compare texture sample value against R component of texcoord */ if (!emit_select(emit, - emit->key.fkey.tex[unit].compare_func, + emit->key.tex[unit].compare_func, writemask( dst2, TGSI_WRITEMASK_XYZ ), r_coord, tex_src_x)) @@ -1927,8 +1894,7 @@ emit_tex(struct svga_shader_emitter *emit, } if (dst.mask & TGSI_WRITEMASK_W) { - struct src_register one = - scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); + struct src_register one = get_one_immediate(emit); if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), writemask( dst2, TGSI_WRITEMASK_W ), @@ -1946,10 +1912,10 @@ emit_tex(struct svga_shader_emitter *emit, /* swizzle from tex_result to dst (handles saturation too, if any) */ emit_tex_swizzle(emit, dst, src(tex_result), - emit->key.fkey.tex[unit].swizzle_r, - emit->key.fkey.tex[unit].swizzle_g, - emit->key.fkey.tex[unit].swizzle_b, - emit->key.fkey.tex[unit].swizzle_a); + emit->key.tex[unit].swizzle_r, + emit->key.tex[unit].swizzle_g, + emit->key.tex[unit].swizzle_b, + emit->key.tex[unit].swizzle_a); } return TRUE; @@ -1957,8 +1923,8 @@ emit_tex(struct svga_shader_emitter *emit, static boolean -emit_bgnloop2(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_bgnloop(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); @@ -1973,8 +1939,8 @@ emit_bgnloop2(struct svga_shader_emitter *emit, static boolean -emit_endloop2(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +emit_endloop(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) { SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); @@ -1984,6 +1950,9 @@ emit_endloop2(struct svga_shader_emitter *emit, } +/** + * Translate/emit TGSI BREAK (out of loop) instruction. + */ static boolean emit_brk(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -1993,9 +1962,13 @@ emit_brk(struct svga_shader_emitter *emit, } +/** + * Emit simple instruction which operates on one scalar value (not + * a vector). Ex: LG2, RCP, RSQ. + */ static boolean emit_scalar_op1(struct svga_shader_emitter *emit, - unsigned opcode, + SVGA3dShaderOpCodeType opcode, const struct tgsi_full_instruction *insn) { SVGA3dShaderInstToken inst; @@ -2011,9 +1984,13 @@ emit_scalar_op1(struct svga_shader_emitter *emit, } +/** + * Translate/emit a simple instruction (one which has no special-case + * code) such as ADD, MUL, MIN, MAX. + */ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, - unsigned opcode, + SVGA3dShaderOpCodeType opcode, const struct tgsi_full_instruction *insn) { const struct tgsi_full_src_register *src = insn->Src; @@ -2045,6 +2022,32 @@ emit_simple_instruction(struct svga_shader_emitter *emit, } +/** + * TGSI_OPCODE_MOVE is only special-cased here to detect the + * svga_fragment_shader::constant_color_output case. + */ +static boolean +emit_mov(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + const struct tgsi_full_src_register *src = &insn->Src[0]; + const struct tgsi_full_dst_register *dst = &insn->Dst[0]; + + if (emit->unit == PIPE_SHADER_FRAGMENT && + dst->Register.File == TGSI_FILE_OUTPUT && + dst->Register.Index == 0 && + src->Register.File == TGSI_FILE_CONSTANT && + !src->Register.Indirect) { + emit->constant_color_output = TRUE; + } + + return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); +} + + +/** + * Translate/emit TGSI DDX, DDY instructions. + */ static boolean emit_deriv(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) @@ -2052,7 +2055,6 @@ emit_deriv(struct svga_shader_emitter *emit, if (emit->dynamic_branching_level > 0 && insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) { - struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); @@ -2062,13 +2064,13 @@ emit_deriv(struct svga_shader_emitter *emit, if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, - scalar(zero, TGSI_SWIZZLE_X))) + get_zero_immediate(emit))) return FALSE; return TRUE; } else { - unsigned opcode; + SVGA3dShaderOpCodeType opcode; const struct tgsi_full_src_register *reg = &insn->Src[0]; SVGA3dShaderInstToken inst; SVGA3dShaderDestToken dst; @@ -2104,6 +2106,11 @@ emit_deriv(struct svga_shader_emitter *emit, } +/** + * Translate/emit ARL (Address Register Load) instruction. Used to + * move a value into the special 'address' register. Used to implement + * indirect/variable indexing into arrays. + */ static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -2165,53 +2172,38 @@ emit_pow(struct svga_shader_emitter *emit, } +/** + * Emit a LRP (linear interpolation) instruction. + */ static boolean -emit_xpd(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) +submit_lrp(struct svga_shader_emitter *emit, + SVGA3dShaderDestToken dst, + struct src_register src0, + struct src_register src1, + struct src_register src2) { - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - const struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); + SVGA3dShaderDestToken tmp; boolean need_dst_tmp = FALSE; - /* XPD can only output to a temporary */ - if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) - need_dst_tmp = TRUE; - - /* The dst reg must not be the same as src0 or src1*/ - if (alias_src_dst(src0, dst) || - alias_src_dst(src1, dst)) + /* The dst reg must be a temporary, and not be the same as src0 or src2 */ + if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || + alias_src_dst(src0, dst) || + alias_src_dst(src2, dst)) need_dst_tmp = TRUE; if (need_dst_tmp) { - SVGA3dShaderDestToken tmp = get_temp( emit ); - - /* Obey DX9 restrictions on mask: - */ - tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; - - if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) - return FALSE; - - if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) - return FALSE; + tmp = get_temp( emit ); + tmp.mask = dst.mask; } else { - if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) - return FALSE; + tmp = dst; } - /* Need to emit 1.0 to dst.w? - */ - if (dst.mask & TGSI_WRITEMASK_W) { - struct src_register zero = get_zero_immediate( emit ); + if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) + return FALSE; - if (!submit_op1(emit, - inst_token( SVGA3DOP_MOV ), - writemask(dst, TGSI_WRITEMASK_W), - zero)) + if (need_dst_tmp) { + if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) return FALSE; } @@ -2219,6 +2211,9 @@ emit_xpd(struct svga_shader_emitter *emit, } +/** + * Translate/emit LRP (Linear Interpolation) instruction. + */ static boolean emit_lrp(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -2234,7 +2229,9 @@ emit_lrp(struct svga_shader_emitter *emit, return submit_lrp(emit, dst, src0, src1, src2); } - +/** + * Translate/emit DST (Distance function) instruction. + */ static boolean emit_dst_insn(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -2256,7 +2253,6 @@ emit_dst_insn(struct svga_shader_emitter *emit, emit, &insn->Src[0] ); const struct src_register src1 = translate_src_register( emit, &insn->Src[1] ); - struct src_register zero = get_zero_immediate( emit ); boolean need_tmp = FALSE; if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || @@ -2276,7 +2272,7 @@ emit_dst_insn(struct svga_shader_emitter *emit, if (tmp.mask & TGSI_WRITEMASK_XW) { if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), writemask(tmp, TGSI_WRITEMASK_XW ), - scalar( zero, 3 ))) + get_one_immediate(emit))) return FALSE; } @@ -2320,7 +2316,6 @@ emit_exp(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); - struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken fraction; if (dst.mask & TGSI_WRITEMASK_Y) @@ -2371,7 +2366,7 @@ emit_exp(struct svga_shader_emitter *emit, if (dst.mask & TGSI_WRITEMASK_W) { if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), writemask(dst, TGSI_WRITEMASK_W), - scalar( zero, TGSI_SWIZZLE_W ) )) + get_one_immediate(emit))) return FALSE; } @@ -2379,6 +2374,9 @@ emit_exp(struct svga_shader_emitter *emit, } +/** + * Translate/emit LIT (Lighting helper) instruction. + */ static boolean emit_lit(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -2412,7 +2410,6 @@ emit_lit(struct svga_shader_emitter *emit, SVGA3dShaderDestToken tmp = get_temp( emit ); const struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); - struct src_register zero = get_zero_immediate( emit ); /* tmp = pow(src.y, src.w) */ @@ -2438,14 +2435,8 @@ emit_lit(struct svga_shader_emitter *emit, */ { SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); - SVGA3dShaderInstToken setp_token, mov_token; struct src_register predsrc; - setp_token = inst_token( SVGA3DOP_SETP ); - mov_token = inst_token( SVGA3DOP_MOV ); - - setp_token.control = SVGA3DOPCOMP_GT; - /* D3D vs GL semantics: */ if (0) @@ -2454,14 +2445,16 @@ emit_lit(struct svga_shader_emitter *emit, predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ /* SETP src0.xxyy, GT, {0}.x */ - if (!submit_op2( emit, setp_token, pred_reg, + if (!submit_op2( emit, + inst_token_setp(SVGA3DOPCOMP_GT), + pred_reg, predsrc, - swizzle(zero, 0, 0, 0, 0) )) + get_zero_immediate(emit))) return FALSE; /* MOV dst, fail */ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, - swizzle(zero, 3, 0, 0, 3 ))) + get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f))) return FALSE; /* MOV dst.yz, tmp (predicated) @@ -2470,8 +2463,8 @@ emit_lit(struct svga_shader_emitter *emit, * as the first source argument. */ if (dst.mask & TGSI_WRITEMASK_YZ) { - mov_token.predicated = 1; - if (!submit_op2( emit, mov_token, + if (!submit_op2( emit, + inst_token_predicated(SVGA3DOP_MOV), writemask(dst, TGSI_WRITEMASK_YZ), src( pred_reg ), src( tmp ) )) return FALSE; @@ -2518,7 +2511,6 @@ emit_log(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); struct src_register src0 = translate_src_register( emit, &insn->Src[0] ); - struct src_register zero = get_zero_immediate( emit ); SVGA3dShaderDestToken abs_tmp; struct src_register abs_src0; SVGA3dShaderDestToken log2_abs; @@ -2610,7 +2602,7 @@ emit_log(struct svga_shader_emitter *emit, if (dst.mask & TGSI_WRITEMASK_W) { if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), writemask(dst, TGSI_WRITEMASK_W), - scalar( zero, TGSI_SWIZZLE_W ) )) + get_one_immediate(emit))) return FALSE; } @@ -2692,6 +2684,9 @@ emit_trunc_round(struct svga_shader_emitter *emit, } +/** + * Translate/emit "begin subroutine" instruction/marker/label. + */ static boolean emit_bgnsub(struct svga_shader_emitter *emit, unsigned position, @@ -2718,6 +2713,9 @@ emit_bgnsub(struct svga_shader_emitter *emit, } +/** + * Translate/emit subroutine call instruction. + */ static boolean emit_call(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) @@ -2730,7 +2728,7 @@ emit_call(struct svga_shader_emitter *emit, break; } - if (emit->nr_labels == Elements(emit->label)) + if (emit->nr_labels == ARRAY_SIZE(emit->label)) return FALSE; if (i == emit->nr_labels) { @@ -2759,7 +2757,9 @@ emit_end(struct svga_shader_emitter *emit) } - +/** + * Translate any TGSI instruction to SVGA. + */ static boolean svga_emit_instruction(struct svga_shader_emitter *emit, unsigned position, @@ -2811,21 +2811,12 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_DP2: return emit_dp2( emit, insn ); - case TGSI_OPCODE_DPH: - return emit_dph( emit, insn ); - - case TGSI_OPCODE_NRM: - return emit_nrm( emit, insn ); - case TGSI_OPCODE_COS: return emit_cos( emit, insn ); case TGSI_OPCODE_SIN: return emit_sin( emit, insn ); - case TGSI_OPCODE_SCS: - return emit_sincos( emit, insn ); - case TGSI_OPCODE_END: /* TGSI always finishes the main func with an END */ return emit_end( emit ); @@ -2854,9 +2845,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SLE: return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); - case TGSI_OPCODE_SUB: - return emit_sub( emit, insn ); - case TGSI_OPCODE_POW: return emit_pow( emit, insn ); @@ -2892,7 +2880,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, /* These aren't actually used by any of the frontends we care * about: */ - case TGSI_OPCODE_CLAMP: case TGSI_OPCODE_AND: case TGSI_OPCODE_OR: case TGSI_OPCODE_I2F: @@ -2910,15 +2897,12 @@ svga_emit_instruction(struct svga_shader_emitter *emit, return emit_endif( emit, insn ); case TGSI_OPCODE_BGNLOOP: - return emit_bgnloop2( emit, insn ); + return emit_bgnloop( emit, insn ); case TGSI_OPCODE_ENDLOOP: - return emit_endloop2( emit, insn ); + return emit_endloop( emit, insn ); case TGSI_OPCODE_BRK: return emit_brk( emit, insn ); - case TGSI_OPCODE_XPD: - return emit_xpd( emit, insn ); - case TGSI_OPCODE_KILL: return emit_kill( emit, insn ); @@ -2934,9 +2918,13 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SSG: return emit_ssg( emit, insn ); + case TGSI_OPCODE_MOV: + return emit_mov( emit, insn ); + default: { - unsigned opcode = translate_opcode(insn->Instruction.Opcode); + SVGA3dShaderOpCodeType opcode = + translate_opcode(insn->Instruction.Opcode); if (opcode == SVGA3DOP_LAST_INST) return FALSE; @@ -2950,9 +2938,13 @@ svga_emit_instruction(struct svga_shader_emitter *emit, } +/** + * Translate/emit a TGSI IMMEDIATE declaration. + * An immediate vector is a constant that's hard-coded into the shader. + */ static boolean svga_emit_immediate(struct svga_shader_emitter *emit, - struct tgsi_full_immediate *imm) + const struct tgsi_full_immediate *imm) { static const float id[4] = {0,0,0,1}; float value[4]; @@ -2964,6 +2956,9 @@ svga_emit_immediate(struct svga_shader_emitter *emit, value[i] = util_is_inf_or_nan(f) ? 0.0f : f; } + /* If the immediate has less than four values, fill in the remaining + * positions from id={0,0,0,1}. + */ for ( ; i < 4; i++ ) value[i] = id[i]; @@ -2990,10 +2985,13 @@ make_immediate(struct svga_shader_emitter *emit, } +/** + * Emit special VS instructions at top of shader. + */ static boolean emit_vs_preamble(struct svga_shader_emitter *emit) { - if (!emit->key.vkey.need_prescale) { + if (!emit->key.vs.need_prescale) { if (!make_immediate( emit, 0, 0, .5, .5, &emit->imm_0055)) return FALSE; @@ -3003,6 +3001,9 @@ emit_vs_preamble(struct svga_shader_emitter *emit) } +/** + * Emit special PS instructions at top of shader. + */ static boolean emit_ps_preamble(struct svga_shader_emitter *emit) { @@ -3040,6 +3041,9 @@ emit_ps_preamble(struct svga_shader_emitter *emit) } +/** + * Emit special PS instructions at end of shader. + */ static boolean emit_ps_postamble(struct svga_shader_emitter *emit) { @@ -3064,9 +3068,8 @@ emit_ps_postamble(struct svga_shader_emitter *emit) * logicop workaround. */ if (emit->unit == PIPE_SHADER_FRAGMENT && - emit->key.fkey.white_fragments) { - struct src_register one = scalar( get_zero_immediate( emit ), - TGSI_SWIZZLE_W ); + emit->key.fs.white_fragments) { + struct src_register one = get_one_immediate(emit); if (!submit_op1( emit, inst_token(SVGA3DOP_MOV), @@ -3074,6 +3077,15 @@ emit_ps_postamble(struct svga_shader_emitter *emit) one )) return FALSE; } + else if (emit->unit == PIPE_SHADER_FRAGMENT && + i < emit->key.fs.write_color0_to_n_cbufs) { + /* Write temp color output [0] to true output [i] */ + if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), + emit->true_color_output[i], + src(emit->temp_color_output[0]))) { + return FALSE; + } + } else { if (!submit_op1( emit, inst_token(SVGA3DOP_MOV), @@ -3088,6 +3100,9 @@ emit_ps_postamble(struct svga_shader_emitter *emit) } +/** + * Emit special VS instructions at end of shader. + */ static boolean emit_vs_postamble(struct svga_shader_emitter *emit) { @@ -3107,7 +3122,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit) /* Need to perform various manipulations on vertex position to cope * with the different GL and D3D clip spaces. */ - if (emit->key.vkey.need_prescale) { + if (emit->key.vs.need_prescale) { SVGA3dShaderDestToken temp_pos = emit->temp_pos; SVGA3dShaderDestToken depth = emit->depth_pos; SVGA3dShaderDestToken pos = emit->true_pos; @@ -3190,6 +3205,7 @@ emit_vs_postamble(struct svga_shader_emitter *emit) /** * For the pixel shader: emit the code which chooses the front * or back face color depending on triangle orientation. + * This happens at the top of the fragment shader. * * 0: IF VFACE :4 * 1: COLOR = FrontColor; @@ -3205,14 +3221,14 @@ emit_light_twoside(struct svga_shader_emitter *emit) struct src_register back[2]; SVGA3dShaderDestToken color[2]; int count = emit->internal_color_count; - int i; + unsigned i; SVGA3dShaderInstToken if_token; if (count == 0) return TRUE; vface = get_vface( emit ); - zero = get_zero_immediate( emit ); + zero = get_zero_immediate(emit); /* Can't use get_temp() to allocate the color reg as such * temporaries will be reclaimed after each instruction by the call @@ -3234,13 +3250,11 @@ emit_light_twoside(struct svga_shader_emitter *emit) if_token = inst_token( SVGA3DOP_IFC ); - if (emit->key.fkey.front_ccw) + if (emit->key.fs.front_ccw) if_token.control = SVGA3DOPCOMP_LT; else if_token.control = SVGA3DOPCOMP_GT; - zero = scalar(zero, TGSI_SWIZZLE_X); - if (!(emit_instruction( emit, if_token ) && emit_src( emit, vface ) && emit_src( emit, zero ) )) @@ -3267,18 +3281,18 @@ emit_light_twoside(struct svga_shader_emitter *emit) /** + * Emit special setup code for the front/back face register in the FS. * 0: SETP_GT TEMP, VFACE, 0 * where TEMP is a fake frontface register */ static boolean emit_frontface(struct svga_shader_emitter *emit) { - struct src_register vface, zero; + struct src_register vface; SVGA3dShaderDestToken temp; struct src_register pass, fail; vface = get_vface( emit ); - zero = get_zero_immediate( emit ); /* Can't use get_temp() to allocate the fake frontface reg as such * temporaries will be reclaimed after each instruction by the call @@ -3287,16 +3301,16 @@ emit_frontface(struct svga_shader_emitter *emit) temp = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); - if (emit->key.fkey.front_ccw) { - pass = scalar( zero, TGSI_SWIZZLE_X ); - fail = scalar( zero, TGSI_SWIZZLE_W ); + if (emit->key.fs.front_ccw) { + pass = get_zero_immediate(emit); + fail = get_one_immediate(emit); } else { - pass = scalar( zero, TGSI_SWIZZLE_W ); - fail = scalar( zero, TGSI_SWIZZLE_X ); + pass = get_one_immediate(emit); + fail = get_zero_immediate(emit); } if (!emit_conditional(emit, PIPE_FUNC_GREATER, - temp, vface, scalar( zero, TGSI_SWIZZLE_X ), + temp, vface, get_zero_immediate(emit), pass, fail)) return FALSE; @@ -3316,8 +3330,6 @@ emit_frontface(struct svga_shader_emitter *emit) static boolean emit_inverted_texcoords(struct svga_shader_emitter *emit) { - struct src_register zero = get_zero_immediate(emit); - struct src_register pos_neg_one = get_pos_neg_one_immediate( emit ); unsigned inverted_texcoords = emit->inverted_texcoords; while (inverted_texcoords) { @@ -3325,20 +3337,20 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) assert(emit->inverted_texcoords & (1 << unit)); - assert(unit < Elements(emit->ps_true_texcoord)); + assert(unit < ARRAY_SIZE(emit->ps_true_texcoord)); - assert(unit < Elements(emit->ps_inverted_texcoord_input)); + assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input)); assert(emit->ps_inverted_texcoord_input[unit] - < Elements(emit->input_map)); + < ARRAY_SIZE(emit->input_map)); /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ if (!submit_op3(emit, inst_token(SVGA3DOP_MAD), dst(emit->ps_inverted_texcoord[unit]), emit->ps_true_texcoord[unit], - swizzle(pos_neg_one, 0, 3, 0, 0), /* (1, -1, 1, 1) */ - swizzle(zero, 0, 3, 0, 0))) /* (0, 1, 0, 0) */ + get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f), + get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f))) return FALSE; /* Reassign the input_map entry to the new texcoord register */ @@ -3352,16 +3364,112 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) } +/** + * Emit code to adjust vertex shader inputs/attributes: + * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs). + * - Set attrib W component = 1. + */ static boolean -needs_to_create_zero( struct svga_shader_emitter *emit ) +emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) +{ + unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | + emit->key.vs.adjust_attrib_w_1); + + while (adjust_mask) { + /* Adjust vertex attrib range and/or set W component = 1 */ + const unsigned index = u_bit_scan(&adjust_mask); + struct src_register tmp; + + /* allocate a temp reg */ + tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); + emit->nr_hw_temp++; + + if (emit->key.vs.adjust_attrib_range & (1 << index)) { + /* The vertex input/attribute is supposed to be a signed value in + * the range [-1,1] but we actually fetched/converted it to the + * range [0,1]. This most likely happens when the app specifies a + * signed byte attribute but we interpreted it as unsigned bytes. + * See also svga_translate_vertex_format(). + * + * Here, we emit some extra instructions to adjust + * the attribute values from [0,1] to [-1,1]. + * + * The adjustment we implement is: + * new_attrib = attrib * 2.0; + * if (attrib >= 0.5) + * new_attrib = new_attrib - 2.0; + * This isn't exactly right (it's off by a bit or so) but close enough. + */ + SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); + + /* tmp = attrib * 2.0 */ + if (!submit_op2(emit, + inst_token(SVGA3DOP_MUL), + dst(tmp), + emit->input_map[index], + get_two_immediate(emit))) + return FALSE; + + /* pred = (attrib >= 0.5) */ + if (!submit_op2(emit, + inst_token_setp(SVGA3DOPCOMP_GE), + pred_reg, + emit->input_map[index], /* vert attrib */ + get_half_immediate(emit))) /* 0.5 */ + return FALSE; + + /* sub(pred) tmp, tmp, 2.0 */ + if (!submit_op3(emit, + inst_token_predicated(SVGA3DOP_SUB), + dst(tmp), + src(pred_reg), + tmp, + get_two_immediate(emit))) + return FALSE; + } + else { + /* just copy the vertex input attrib to the temp register */ + if (!submit_op1(emit, + inst_token(SVGA3DOP_MOV), + dst(tmp), + emit->input_map[index])) + return FALSE; + } + + if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { + /* move 1 into W position of tmp */ + if (!submit_op1(emit, + inst_token(SVGA3DOP_MOV), + writemask(dst(tmp), TGSI_WRITEMASK_W), + get_one_immediate(emit))) + return FALSE; + } + + /* Reassign the input_map entry to the new tmp register */ + emit->input_map[index] = tmp; + } + + return TRUE; +} + + +/** + * Determine if we need to create the "common" immediate value which is + * used for generating useful vector constants such as {0,0,0,0} and + * {1,1,1,1}. + * We could just do this all the time except that we want to conserve + * registers whenever possible. + */ +static boolean +needs_to_create_common_immediate(const struct svga_shader_emitter *emit) { unsigned i; if (emit->unit == PIPE_SHADER_FRAGMENT) { - if (emit->key.fkey.light_twoside) + if (emit->key.fs.light_twoside) return TRUE; - if (emit->key.fkey.white_fragments) + if (emit->key.fs.white_fragments) return TRUE; if (emit->emit_frontface) @@ -3375,25 +3483,27 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->inverted_texcoords) return TRUE; - /* look for any PIPE_SWIZZLE_ZERO/ONE terms */ - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA || - emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA) + /* look for any PIPE_SWIZZLE_0/ONE terms */ + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W || + emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W) return TRUE; } - for (i = 0; i < emit->key.fkey.num_textures; i++) { - if (emit->key.fkey.tex[i].compare_mode + for (i = 0; i < emit->key.num_textures; i++) { + if (emit->key.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) return TRUE; } } - - if (emit->unit == PIPE_SHADER_VERTEX) { + else if (emit->unit == PIPE_SHADER_VERTEX) { if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) return TRUE; + if (emit->key.vs.adjust_attrib_range || + emit->key.vs.adjust_attrib_w_1) + return TRUE; } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || @@ -3409,7 +3519,6 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || - emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1) return TRUE; @@ -3417,15 +3526,18 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) } +/** + * Do we need to create a looping constant? + */ static boolean -needs_to_create_loop_const( struct svga_shader_emitter *emit ) +needs_to_create_loop_const(const struct svga_shader_emitter *emit) { return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); } static boolean -needs_to_create_arl_consts( struct svga_shader_emitter *emit ) +needs_to_create_arl_consts(const struct svga_shader_emitter *emit) { return (emit->num_arl_consts > 0); } @@ -3435,7 +3547,7 @@ static boolean pre_parse_add_indirect( struct svga_shader_emitter *emit, int num, int current_arl) { - int i; + unsigned i; assert(num < 0); for (i = 0; i < emit->num_arl_consts; ++i) { @@ -3523,8 +3635,8 @@ pre_parse_tokens( struct svga_shader_emitter *emit, static boolean svga_shader_emit_helpers(struct svga_shader_emitter *emit) { - if (needs_to_create_zero( emit )) { - create_zero_immediate( emit ); + if (needs_to_create_common_immediate( emit )) { + create_common_immediate( emit ); } if (needs_to_create_loop_const( emit )) { create_loop_const( emit ); @@ -3534,10 +3646,13 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) } if (emit->unit == PIPE_SHADER_FRAGMENT) { + if (!svga_shader_emit_samplers_decl( emit )) + return FALSE; + if (!emit_ps_preamble( emit )) return FALSE; - if (emit->key.fkey.light_twoside) { + if (emit->key.fs.light_twoside) { if (!emit_light_twoside( emit )) return FALSE; } @@ -3550,20 +3665,54 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) return FALSE; } } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + if (emit->key.vs.adjust_attrib_range) { + if (!emit_adjusted_vertex_attribs(emit) || + emit->key.vs.adjust_attrib_w_1) { + return FALSE; + } + } + } return TRUE; } +/** + * This is the main entrypoint into the TGSI instruction translater. + * Translate TGSI shader tokens into an SVGA shader. + */ boolean svga_shader_emit_instructions(struct svga_shader_emitter *emit, const struct tgsi_token *tokens) { struct tgsi_parse_context parse; + const struct tgsi_token *new_tokens = NULL; boolean ret = TRUE; boolean helpers_emitted = FALSE; unsigned line_nr = 0; + if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { + unsigned unit; + + new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, + TGSI_FILE_INPUT); + + if (new_tokens) { + /* Setup texture state for stipple */ + emit->sampler_target[unit] = TGSI_TEXTURE_2D; + emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; + emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; + emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; + emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; + + emit->pstipple_sampler_unit = unit; + + tokens = new_tokens; + } + } + tgsi_parse_init( &parse, tokens ); emit->internal_imm_count = 0; @@ -3630,5 +3779,9 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, done: tgsi_parse_free( &parse ); + if (new_tokens) { + tgsi_free_tokens(new_tokens); + } + return ret; }