From: Axel Davy Date: Sat, 17 Sep 2016 10:14:58 +0000 (+0200) Subject: st/nine: Add support for swvp constants in shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1604efa6fda9b780e8537a131ad77f3e83e5a67a;p=mesa.git st/nine: Add support for swvp constants in shaders swvp has relaxed limits (more nested loops, etc). In particular it enables more constants. Signed-off-by: Axel Davy --- diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 140226ec58b..2d4e323a4ae 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -432,11 +432,7 @@ struct sm1_local_const { INT idx; struct ureg_src reg; - union { - boolean b; - float f[4]; - int32_t i[4]; - } imm; + float f[4]; /* for indirect addressing of float constants */ }; struct shader_translator @@ -507,8 +503,10 @@ struct shader_translator struct sm1_local_const *lconstf; unsigned num_lconstf; - struct sm1_local_const lconsti[NINE_MAX_CONST_I]; - struct sm1_local_const lconstb[NINE_MAX_CONST_B]; + struct sm1_local_const *lconsti; + unsigned num_lconsti; + struct sm1_local_const *lconstb; + unsigned num_lconstb; boolean indirect_const_access; boolean failure; @@ -542,6 +540,7 @@ static boolean tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) { INT i; + if (index < 0 || index >= tx->num_constf_allowed) { tx->failure = TRUE; return FALSE; @@ -557,24 +556,36 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) { + int i; + if (index < 0 || index >= tx->num_consti_allowed) { tx->failure = TRUE; return FALSE; } - if (tx->lconsti[index].idx == index) - *src = tx->lconsti[index].reg; - return tx->lconsti[index].idx == index; + for (i = 0; i < tx->num_lconsti; ++i) { + if (tx->lconsti[i].idx == index) { + *src = tx->lconsti[i].reg; + return TRUE; + } + } + return FALSE; } static boolean tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) { + int i; + if (index < 0 || index >= tx->num_constb_allowed) { tx->failure = TRUE; return FALSE; } - if (tx->lconstb[index].idx == index) - *src = tx->lconstb[index].reg; - return tx->lconstb[index].idx == index; + for (i = 0; i < tx->num_lconstb; ++i) { + if (tx->lconstb[i].idx == index) { + *src = tx->lconstb[i].reg; + return TRUE; + } + } + return FALSE; } static void @@ -599,23 +610,55 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) tx->lconstf[n].idx = index; tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); - memcpy(tx->lconstf[n].imm.f, f, sizeof(tx->lconstf[n].imm.f)); + memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); } static void tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) { + unsigned n; + FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) - tx->lconsti[index].idx = index; - tx->lconsti[index].reg = tx->native_integers ? + + for (n = 0; n < tx->num_lconsti; ++n) + if (tx->lconsti[n].idx == index) + break; + if (n == tx->num_lconsti) { + if ((n % 8) == 0) { + tx->lconsti = REALLOC(tx->lconsti, + (n + 0) * sizeof(tx->lconsti[0]), + (n + 8) * sizeof(tx->lconsti[0])); + assert(tx->lconsti); + } + tx->num_lconsti++; + } + + tx->lconsti[n].idx = index; + tx->lconsti[n].reg = tx->native_integers ? ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); } static void tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) { + unsigned n; + FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) - tx->lconstb[index].idx = index; - tx->lconstb[index].reg = tx->native_integers ? + + for (n = 0; n < tx->num_lconstb; ++n) + if (tx->lconstb[n].idx == index) + break; + if (n == tx->num_lconstb) { + if ((n % 8) == 0) { + tx->lconstb = REALLOC(tx->lconstb, + (n + 0) * sizeof(tx->lconstb[0]), + (n + 8) * sizeof(tx->lconstb[0])); + assert(tx->lconstb); + } + tx->num_lconstb++; + } + + tx->lconstb[n].idx = index; + tx->lconstb[n].reg = tx->native_integers ? ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); } @@ -942,7 +985,26 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) if (param->rel || !tx_lconstf(tx, &src, param->idx)) { if (!param->rel) nine_info_mark_const_f_used(tx->info, param->idx); - src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); + /* vswp constant handling: we use two buffers + * to fit all the float constants. The special handling + * doesn't need to be elsewhere, because all the instructions + * accessing the constants directly are VS1, and swvp + * is VS >= 2 */ + if (IS_VS && tx->info->swvp_on) { + if (!param->rel) { + if (param->idx < 4096) { + src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); + src = ureg_src_dimension(src, 0); + } else { + src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096); + src = ureg_src_dimension(src, 1); + } + } else { + src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */ + src = ureg_src_dimension(src, 0); + } + } else + src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); } if (!IS_VS && tx->version.major < 2) { /* ps 1.X clamps constants */ @@ -964,8 +1026,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) assert(!param->rel); if (!tx_lconsti(tx, &src, param->idx)) { nine_info_mark_const_i_used(tx->info, param->idx); - src = ureg_src_register(TGSI_FILE_CONSTANT, - tx->info->const_i_base + param->idx); + if (IS_VS && tx->info->swvp_on) { + src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); + src = ureg_src_dimension(src, 2); + } else + src = ureg_src_register(TGSI_FILE_CONSTANT, + tx->info->const_i_base + param->idx); } break; case D3DSPR_CONSTBOOL: @@ -974,8 +1040,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) char r = param->idx / 4; char s = param->idx & 3; nine_info_mark_const_b_used(tx->info, param->idx); - src = ureg_src_register(TGSI_FILE_CONSTANT, - tx->info->const_b_base + r); + if (IS_VS && tx->info->swvp_on) { + src = ureg_src_register(TGSI_FILE_CONSTANT, r); + src = ureg_src_dimension(src, 3); + } else + src = ureg_src_register(TGSI_FILE_CONSTANT, + tx->info->const_b_base + r); src = ureg_swizzle(src, s, s, s, s); } break; @@ -3353,8 +3423,6 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) struct shader_translator *tx; HRESULT hr = D3D_OK; const unsigned processor = info->type; - unsigned s, slot_max; - unsigned max_const_f; user_assert(processor != ~0, D3DERR_INVALIDCALL); @@ -3411,6 +3479,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) tx->num_constb_allowed = NINE_MAX_CONST_B; } + if (IS_VS && tx->version.major >= 2 && info->swvp_on) { + tx->num_constf_allowed = 8192; + tx->num_consti_allowed = 2048; + tx->num_constb_allowed = 2048; + } + /* VS must always write position. Declare it here to make it the 1st output. * (Some drivers like nv50 are buggy and rely on that.) */ @@ -3485,7 +3559,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) k = i; } indices[n] = tx->lconstf[k].idx; - memcpy(&data[n * 4], &tx->lconstf[k].imm.f[0], 4 * sizeof(float)); + memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); tx->lconstf[k].idx = INT_MAX; } @@ -3520,25 +3594,35 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) /* r500 */ if (info->const_float_slots > device->max_vs_const_f && - (info->const_int_slots || info->const_bool_slots)) + (info->const_int_slots || info->const_bool_slots) && + (!IS_VS || !info->swvp_on)) ERR("Overlapping constant slots. The shader is likely to be buggy\n"); if (tx->indirect_const_access) /* vs only */ info->const_float_slots = device->max_vs_const_f; - max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; - slot_max = info->const_bool_slots > 0 ? - max_const_f + NINE_MAX_CONST_I - + DIV_ROUND_UP(info->const_bool_slots, 4) : - info->const_int_slots > 0 ? - max_const_f + info->const_int_slots : - info->const_float_slots; + if (!IS_VS || !info->swvp_on) { + unsigned s, slot_max; + unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; + + slot_max = info->const_bool_slots > 0 ? + max_const_f + NINE_MAX_CONST_I + + DIV_ROUND_UP(info->const_bool_slots, 4) : + info->const_int_slots > 0 ? + max_const_f + info->const_int_slots : + info->const_float_slots; - info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ + info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ - for (s = 0; s < slot_max; s++) - ureg_DECL_constant(tx->ureg, s); + for (s = 0; s < slot_max; s++) + ureg_DECL_constant(tx->ureg, s); + } else { + ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); + ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); + ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); + ureg_DECL_constant2D(tx->ureg, 0, 511, 3); + } if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { unsigned count; diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index 7f2f57f0d34..092ae634d7d 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -76,6 +76,8 @@ struct nine_shader_info struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ uint8_t bumpenvmat_needed; + + boolean swvp_on; }; static inline void diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index 31135758ff1..bc09a413fab 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -63,6 +63,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.fog_enable = 0; info.point_size_min = 0; info.point_size_max = 0; + info.swvp_on = false; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -167,6 +168,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]); info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]); + info.swvp_on = false; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr))