unsigned num_lconstb;
boolean slots_used[NINE_MAX_CONST_ALL];
+ unsigned *slot_map;
unsigned num_slots;
boolean indirect_const_access;
static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
{
struct ureg_src src;
+
+ if (tx->slot_map)
+ idx = tx->slot_map[idx];
/* vswp constant handling: we use two buffers
* to fit all the float constants. The special handling
* doesn't need to be elsewhere, because all the instructions
src = ureg_src_dimension(src, 2);
} else {
unsigned slot_idx = tx->info->const_i_base + idx;
+ if (tx->slot_map)
+ slot_idx = tx->slot_map[slot_idx];
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
src = ureg_src_dimension(src, 0);
tx->slots_used[slot_idx] = TRUE;
src = ureg_src_dimension(src, 3);
} else {
unsigned slot_idx = tx->info->const_b_base + r;
+ if (tx->slot_map)
+ slot_idx = tx->slot_map[slot_idx];
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
src = ureg_src_dimension(src, 0);
tx->slots_used[slot_idx] = TRUE;
static void
tx_dtor(struct shader_translator *tx)
{
+ if (tx->slot_map)
+ FREE(tx->slot_map);
if (tx->num_inst_labels)
FREE(tx->inst_labels);
FREE(tx->lconstf);
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
}
+static void parse_shader(struct shader_translator *tx)
+{
+ struct nine_shader_info *info = tx->info;
+
+ while (!sm1_parse_eof(tx) && !tx->failure)
+ sm1_parse_instruction(tx);
+ tx->parse++; /* for byte_size */
+
+ if (tx->failure)
+ return;
+
+ if (IS_PS && tx->version.major < 3) {
+ if (tx->version.major < 2) {
+ assert(tx->num_temp); /* there must be color output */
+ info->rt_mask |= 0x1;
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
+ } else {
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
+ }
+ }
+
+ if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
+ tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
+ ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
+ }
+
+ if (info->position_t)
+ ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
+
+ if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
+ struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
+ ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
+ ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
+ info->point_size = TRUE;
+ }
+
+ if (info->process_vertices)
+ shader_add_vs_viewport_transform(tx);
+
+ ureg_END(tx->ureg);
+}
+
HRESULT
nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
{
HRESULT hr = D3D_OK;
const unsigned processor = info->type;
struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
+ unsigned *const_ranges = NULL;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
tx->version.major, tx->version.minor);
- while (!sm1_parse_eof(tx) && !tx->failure)
- sm1_parse_instruction(tx);
- tx->parse++; /* for byte_size */
+ parse_shader(tx);
if (tx->failure) {
/* For VS shaders, we print the warning later,
goto out;
}
- if (IS_PS && tx->version.major < 3) {
- if (tx->version.major < 2) {
- assert(tx->num_temp); /* there must be color output */
- info->rt_mask |= 0x1;
- shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
- } else {
- shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
- }
- }
+ /* Recompile after compacting constant slots if possible */
+ if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0 && 0) {
+ unsigned *slot_map;
+ unsigned c;
+ int i, j, num_ranges, prev;
- if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
- tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
- ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
- }
+ DBG("Recompiling shader for constant compaction\n");
+ ureg_destroy(tx->ureg);
- if (info->position_t)
- ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
+ if (tx->num_inst_labels)
+ FREE(tx->inst_labels);
+ FREE(tx->lconstf);
+ FREE(tx->regs.r);
+
+ num_ranges = 0;
+ prev = -2;
+ for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
+ if (tx->slots_used[i]) {
+ if (prev != i - 1)
+ num_ranges++;
+ prev = i;
+ }
+ }
+ slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
+ const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
+ if (!slot_map || !const_ranges) {
+ hr = E_OUTOFMEMORY;
+ goto out;
+ }
+ c = 0;
+ j = -1;
+ prev = -2;
+ for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
+ if (tx->slots_used[i]) {
+ if (prev != i - 1)
+ j++;
+ /* Initialize first slot of the range */
+ if (!const_ranges[2*j+1])
+ const_ranges[2*j] = i;
+ const_ranges[2*j+1]++;
+ prev = i;
+ slot_map[i] = c++;
+ }
+ }
- if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
- struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
- ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
- ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
- info->point_size = TRUE;
+ if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
+ hr = E_OUTOFMEMORY;
+ goto out;
+ }
+ tx->slot_map = slot_map;
+ parse_shader(tx);
+ assert(!tx->failure);
+#if !defined(NDEBUG)
+ i = 0;
+ j = 0;
+ while (const_ranges[i*2+1] != 0) {
+ j += const_ranges[i*2+1];
+ i++;
+ }
+ assert(j == tx->num_slots);
+#endif
}
- if (info->process_vertices)
- shader_add_vs_viewport_transform(tx);
-
- ureg_END(tx->ureg);
-
/* record local constants */
if (tx->num_lconstf && tx->indirect_const_access) {
struct nine_range *ranges;
goto out;
}
+ info->const_ranges = const_ranges;
+ const_ranges = NULL;
info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
out:
+ if (const_ranges)
+ FREE(const_ranges);
tx_dtor(tx);
return hr;
}