X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fstate_trackers%2Fnine%2Fnine_shader.c;h=754f5af6b8e068eb435dedb437d43722948d0df5;hb=248833ff4072da4f3362dc9f0eab84eb015f3964;hp=b0dd07f820d8bbbca9ebf941f60d950409ecc047;hpb=70a523818f08f97b6d51f156dca383cfcab8efab;p=mesa.git diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index b0dd07f820d..754f5af6b8e 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -27,6 +27,7 @@ #include "nine_debug.h" #include "nine_state.h" +#include "util/macros.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" @@ -42,7 +43,7 @@ struct shader_translator; typedef HRESULT (*translate_instruction_func)(struct shader_translator *); -static INLINE const char *d3dsio_to_string(unsigned opcode); +static inline const char *d3dsio_to_string(unsigned opcode); #define NINED3D_SM1_VS 0xfffe @@ -238,7 +239,7 @@ struct sm1_dst_param BYTE type; }; -static INLINE void +static inline void assert_replicate_swizzle(const struct ureg_src *reg) { assert(reg->SwizzleY == reg->SwizzleX && @@ -495,6 +496,7 @@ struct shader_translator struct sm1_local_const lconstb[NINE_MAX_CONST_B]; boolean indirect_const_access; + boolean failure; struct nine_shader_info *info; @@ -503,6 +505,9 @@ struct shader_translator #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX) #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT) +#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3) + +#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} static void sm1_read_semantic(struct shader_translator *, struct sm1_semantic *); @@ -523,7 +528,10 @@ static boolean tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) { INT i; - assert(index >= 0 && index < (NINE_MAX_CONST_F * 2)); + if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) { + tx->failure = TRUE; + return FALSE; + } for (i = 0; i < tx->num_lconstf; ++i) { if (tx->lconstf[i].idx == index) { *src = tx->lconstf[i].reg; @@ -535,7 +543,10 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) { - assert(index >= 0 && index < NINE_MAX_CONST_I); + if (index < 0 || index >= NINE_MAX_CONST_I) { + tx->failure = TRUE; + return FALSE; + } if (tx->lconsti[index].idx == index) *src = tx->lconsti[index].reg; return tx->lconsti[index].idx == index; @@ -543,7 +554,10 @@ tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) { - assert(index >= 0 && index < NINE_MAX_CONST_B); + if (index < 0 || index >= NINE_MAX_CONST_B) { + tx->failure = TRUE; + return FALSE; + } if (tx->lconstb[index].idx == index) *src = tx->lconstb[index].reg; return tx->lconstb[index].idx == index; @@ -554,9 +568,8 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) { unsigned n; - /* Anno1404 sets out of range constants. */ - assert(index >= 0 && index < (NINE_MAX_CONST_F * 2)); - if (index >= NINE_MAX_CONST_F) + FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER) + if (IS_VS && index >= NINE_MAX_CONST_F_SHADER) WARN("lconstf index %i too high, indirect access won't work\n", index); for (n = 0; n < tx->num_lconstf; ++n) @@ -579,7 +592,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) static void tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) { - assert(index >= 0 && index < NINE_MAX_CONST_I); + FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I) tx->lconsti[index].idx = index; tx->lconsti[index].reg = tx->native_integers ? ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : @@ -588,29 +601,32 @@ tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) static void tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) { - assert(index >= 0 && index < NINE_MAX_CONST_B); + FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B) tx->lconstb[index].idx = index; tx->lconstb[index].reg = tx->native_integers ? ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); } -static INLINE struct ureg_dst +static inline struct ureg_dst tx_scratch(struct shader_translator *tx) { - assert(tx->num_scratch < Elements(tx->regs.t)); + if (tx->num_scratch >= Elements(tx->regs.t)) { + tx->failure = TRUE; + return tx->regs.t[0]; + } if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); return tx->regs.t[tx->num_scratch++]; } -static INLINE struct ureg_dst +static inline struct ureg_dst tx_scratch_scalar(struct shader_translator *tx) { return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); } -static INLINE struct ureg_src +static inline struct ureg_src tx_src_scalar(struct ureg_dst dst) { struct ureg_src src = ureg_src(dst); @@ -620,7 +636,7 @@ tx_src_scalar(struct ureg_dst dst) return src; } -static INLINE void +static inline void tx_temp_alloc(struct shader_translator *tx, INT idx) { assert(idx >= 0); @@ -638,7 +654,7 @@ tx_temp_alloc(struct shader_translator *tx, INT idx) tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); } -static INLINE void +static inline void tx_addr_alloc(struct shader_translator *tx, INT idx) { assert(idx == 0); @@ -648,7 +664,7 @@ tx_addr_alloc(struct shader_translator *tx, INT idx) tx->regs.a0 = ureg_DECL_temporary(tx->ureg); } -static INLINE void +static inline void tx_pred_alloc(struct shader_translator *tx, INT idx) { assert(idx == 0); @@ -656,7 +672,7 @@ tx_pred_alloc(struct shader_translator *tx, INT idx) tx->regs.p = ureg_DECL_predicate(tx->ureg); } -static INLINE void +static inline void tx_texcoord_alloc(struct shader_translator *tx, INT idx) { assert(IS_PS); @@ -666,7 +682,7 @@ tx_texcoord_alloc(struct shader_translator *tx, INT idx) TGSI_INTERPOLATE_PERSPECTIVE); } -static INLINE unsigned * +static inline unsigned * tx_bgnloop(struct shader_translator *tx) { tx->loop_depth++; @@ -676,7 +692,7 @@ tx_bgnloop(struct shader_translator *tx) return &tx->loop_labels[tx->loop_depth - 1]; } -static INLINE unsigned * +static inline unsigned * tx_endloop(struct shader_translator *tx) { assert(tx->loop_depth); @@ -725,7 +741,7 @@ tx_get_loopal(struct shader_translator *tx) return ureg_src_undef(); } -static INLINE unsigned * +static inline unsigned * tx_cond(struct shader_translator *tx) { assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); @@ -733,14 +749,14 @@ tx_cond(struct shader_translator *tx) return &tx->cond_labels[tx->cond_depth - 1]; } -static INLINE unsigned * +static inline unsigned * tx_elsecond(struct shader_translator *tx) { assert(tx->cond_depth); return &tx->cond_labels[tx->cond_depth - 1]; } -static INLINE void +static inline void tx_endcond(struct shader_translator *tx) { assert(tx->cond_depth); @@ -749,7 +765,7 @@ tx_endcond(struct shader_translator *tx) ureg_get_instruction_number(tx->ureg)); } -static INLINE struct ureg_dst +static inline struct ureg_dst nine_ureg_dst_register(unsigned file, int index) { return ureg_dst(ureg_src_register(file, index)); @@ -1082,7 +1098,7 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) if (ureg_dst_is_undef(tx->regs.oDepth)) tx->regs.oDepth = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, - TGSI_WRITEMASK_Z); + TGSI_WRITEMASK_Z, 0, 1); dst = tx->regs.oDepth; /* XXX: must write .z component */ break; case D3DSPR_PREDICATE: @@ -1224,7 +1240,7 @@ NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, co #define VNOTSUPPORTED 0, 0 #define V(maj, min) (((maj) << 8) | (min)) -static INLINE const char * +static inline const char * d3dsio_to_string( unsigned opcode ) { static const char *names[] = { @@ -1641,7 +1657,7 @@ DECL_SPECIAL(IF) return D3D_OK; } -static INLINE unsigned +static inline unsigned sm1_insn_flags_to_tgsi_setop(BYTE flags) { switch (flags) { @@ -1708,7 +1724,7 @@ static const char *sm1_declusage_names[] = [D3DDECLUSAGE_SAMPLE] = "SAMPLE" }; -static INLINE unsigned +static inline unsigned sm1_to_nine_declusage(struct sm1_semantic *dcl) { return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); @@ -1817,7 +1833,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) -static INLINE unsigned +static inline unsigned d3dstt_to_tgsi_tex(BYTE sampler_type) { switch (sampler_type) { @@ -1830,7 +1846,7 @@ d3dstt_to_tgsi_tex(BYTE sampler_type) return TGSI_TEXTURE_UNKNOWN; } } -static INLINE unsigned +static inline unsigned d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) { switch (sampler_type) { @@ -1843,7 +1859,7 @@ d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) return TGSI_TEXTURE_UNKNOWN; } } -static INLINE unsigned +static inline unsigned ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) { switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { @@ -1868,7 +1884,7 @@ sm1_sampler_type_name(BYTE sampler_type) } } -static INLINE unsigned +static inline unsigned nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) { switch (sem->Name) { @@ -1950,7 +1966,7 @@ DECL_SPECIAL(DCL) tx->info->position_t = TRUE; assert(sem.reg.idx < Elements(tx->regs.o)); tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( - ureg, tgsi.Name, tgsi.Index, sem.reg.mask); + ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); if (tgsi.Name == TGSI_SEMANTIC_PSIZE) tx->regs.oPts = tx->regs.o[sem.reg.idx]; @@ -1963,12 +1979,13 @@ DECL_SPECIAL(DCL) ureg, tgsi.Name, tgsi.Index, nine_tgsi_to_interp_mode(&tgsi), 0, /* cylwrap */ - sem.reg.mod & NINED3DSPDM_CENTROID); + sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1); } else if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ /* FragColor or FragDepth */ assert(sem.reg.mask != 0); - ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask); + ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, + 0, 1); } } return D3D_OK; @@ -2025,6 +2042,23 @@ DECL_SPECIAL(LOG) return D3D_OK; } +DECL_SPECIAL(LIT) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst tmp = tx_scratch(tx); + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); + ureg_LIT(ureg, tmp, src); + /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 + * states that dst.z is 0 when src.y <= 0. Gallium definition can assign + * it 0^0 if src.w=0, which value is driver dependent. */ + ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), + ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); + ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); + return D3D_OK; +} + DECL_SPECIAL(NRM) { struct ureg_program *ureg = tx->ureg; @@ -2279,7 +2313,8 @@ DECL_SPECIAL(TEXM3x2DEPTH) ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); /* replace the depth for depth testing with the result */ - tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z); + tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_WRITEMASK_Z, 0, 1); ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); /* note that we write nothing to the destination, since it's disallowed to use it afterward */ return D3D_OK; @@ -2377,7 +2412,8 @@ DECL_SPECIAL(TEXDEPTH) ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), r5r, ureg_imm1f(ureg, 1.0f)); /* replace the depth for depth testing with the result */ - tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z); + tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_WRITEMASK_Z, 0, 1); ureg_MOV(ureg, tx->regs.oDepth, r5r); return D3D_OK; @@ -2461,8 +2497,8 @@ DECL_SPECIAL(TEXLDD) tx_src_param(tx, &tx->insn.src[2]), tx_src_param(tx, &tx->insn.src[3]) }; - assert(tx->insn.src[3].idx >= 0 && - tx->insn.src[3].idx < Elements(tx->sampler_targets)); + assert(tx->insn.src[1].idx >= 0 && + tx->insn.src[1].idx < Elements(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); @@ -2477,8 +2513,8 @@ DECL_SPECIAL(TEXLDL) tx_src_param(tx, &tx->insn.src[0]), tx_src_param(tx, &tx->insn.src[1]) }; - assert(tx->insn.src[3].idx >= 0 && - tx->insn.src[3].idx < Elements(tx->sampler_targets)); + assert(tx->insn.src[1].idx >= 0 && + tx->insn.src[1].idx < Elements(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; ureg_TXL(tx->ureg, dst, target, src[0], src[1]); @@ -2528,7 +2564,7 @@ struct sm1_op_info inst_table[] = _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ - _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */ + _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ @@ -2649,7 +2685,7 @@ create_op_info_map(struct shader_translator *tx) } } -static INLINE HRESULT +static inline HRESULT NineTranslateInstruction_Generic(struct shader_translator *tx) { struct ureg_dst dst[1]; @@ -2667,19 +2703,19 @@ NineTranslateInstruction_Generic(struct shader_translator *tx) return D3D_OK; } -static INLINE DWORD +static inline DWORD TOKEN_PEEK(struct shader_translator *tx) { return *(tx->parse); } -static INLINE DWORD +static inline DWORD TOKEN_NEXT(struct shader_translator *tx) { return *(tx->parse)++; } -static INLINE void +static inline void TOKEN_JUMP(struct shader_translator *tx) { if (tx->parse_next && tx->parse != tx->parse_next) { @@ -2688,7 +2724,7 @@ TOKEN_JUMP(struct shader_translator *tx) } } -static INLINE boolean +static inline boolean sm1_parse_eof(struct shader_translator *tx) { return TOKEN_PEEK(tx) == NINED3DSP_END; @@ -2763,7 +2799,7 @@ sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) *rel = (1 << 31) | ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | - (D3DSP_NOSWIZZLE << D3DSP_SWIZZLE_SHIFT); + D3DSP_NOSWIZZLE; else *rel = TOKEN_NEXT(tx); } @@ -3027,7 +3063,7 @@ tx_dtor(struct shader_translator *tx) FREE(tx); } -static INLINE unsigned +static inline unsigned tgsi_processor_from_type(unsigned shader_type) { switch (shader_type) { @@ -3050,6 +3086,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) HRESULT hr = D3D_OK; const unsigned processor = tgsi_processor_from_type(info->type); unsigned s, slot_max; + unsigned max_const_f; user_assert(processor != ~0, D3DERR_INVALIDCALL); @@ -3097,10 +3134,17 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); } - while (!sm1_parse_eof(tx)) + while (!sm1_parse_eof(tx) && !tx->failure) sm1_parse_instruction(tx); tx->parse++; /* for byte_size */ + if (tx->failure) { + ERR("Encountered buggy shader\n"); + ureg_destroy(tx->ureg); + hr = D3DERR_INVALIDCALL; + goto out; + } + if (IS_PS && (tx->version.major < 2) && tx->num_temp) { ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0), ureg_src(tx->regs.r[0])); @@ -3173,14 +3217,21 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) hr = D3D_OK; } + /* r500 */ + if (info->const_float_slots > device->max_vs_const_f && + (info->const_int_slots || info->const_bool_slots)) + ERR("Overlapping constant slots. The shader is likely to be buggy\n"); + + if (tx->indirect_const_access) /* vs only */ info->const_float_slots = device->max_vs_const_f; + max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; slot_max = info->const_bool_slots > 0 ? - device->max_vs_const_f + NINE_MAX_CONST_I - + info->const_bool_slots : + max_const_f + NINE_MAX_CONST_I + + DIV_ROUND_UP(info->const_bool_slots, 4) : info->const_int_slots > 0 ? - device->max_vs_const_f + info->const_int_slots : + max_const_f + info->const_int_slots : info->const_float_slots; info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */